hammer2 - Implement error processing and free reserve enforcement
authorMatthew Dillon <dillon@apollo.backplane.com>
Thu, 7 Sep 2017 02:56:24 +0000 (19:56 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Thu, 7 Sep 2017 03:12:22 +0000 (20:12 -0700)
* newfs_hammer2 calculates the correct amount of reserved space.  We
  have to reserve 4MB per 1GB, not 4MB per 2GB, due to a snafu.  This
  is still only 0.4% of the storage.

* Flesh out HAMMER2_ERROR_* codes and make most hammer2 functions return
  a proper error code.

* Add error handling to nearly all code that can dirty a chain, in
  particular to handle ENOSPC issues.  Any dirty buffers that cannot be
  flushed will incur a write error (which in DragonFly typically causes
  the buffer to be retries later).  Any dirty chain that cannot be
  flushed will remain in the topology and can be completed in a later
  flush if space has been freed up.

  We try to avoid allowing the filesystem to get into this situation in
  the first place, but if it does, it should be possible to flush these
  asynchronous modifying chains and buffers once space is freed up via
  bulkfree.

* Relax class match requirements in the freemap allocator when the freemap
  gets close to full.  This will allow e.g. inodes to be allocated out of
  DATA bitmaps and vise versa, and so forth.  This fixes edge conditions
  where there is enough free space available but it has all been earmarked
  for the wrong data class.

* Try to fix a bug in live_count tracking when destroying an indirect
  block chain or inode chain that has not yet been blockmapped due to
  a drop.  This situation only occurs when chains cannot be flushed due
  to I/O errors or disk full conditions, and are then later destroyed
  (e.g. such as when the governing file is removed).

  This should fix a live_count assertion that can occur under these
  circumstances.  See hammer2_chain_lastdrop().

* Enforce the free reserve requirement for all modifying VOP calls.
  Root users can nominally fill the file system to 97.5%, non-root
  users to 95%.  At 90%, write()s will enforce bawrite() verses bdwrite()
  to try to avoid buffer cache flushes from actually running the
  filesystem out of space.

  This is needed because we do not actually know how much disk space is
  going to be needed at write() time.  Deduplication and compression
  occurs later, at buffer-flush time.

* Do NOT flush the volume header when a vfs sync is unable to completely
  flush a device due to errors.  This ensures that the underlying media
  does not become corrupt.

* Fix an issue where bref.check.freemap.bigmask was not being properly
  reset to -1 when bulkfree is able to free an element.  This bug
  prevented the allocator from recognizing that free space was available
  in that bitmap.

* Modify bulkfree operation to use the live topology when flushing and
  snapshot operations fail due to errors, allowing bulkfree to run.

* Nominal bulkfree operations now runs on the snapshot without a
  transaction (more testing is needed).  This theoretically should allow
  bulkfree to run concurrent with just about any operation including
  flushes.

* Add a freespace tracking heuristic to reduce the overhead that modifying
  VOP calls incur in checking the free reserve requirement.

* hammer2 show dumps additional info for freemap nodes.

19 files changed:
sbin/hammer2/cmd_debug.c
sbin/newfs_hammer2/newfs_hammer2.c
sys/vfs/hammer2/TODO
sys/vfs/hammer2/hammer2.h
sys/vfs/hammer2/hammer2_admin.c
sys/vfs/hammer2/hammer2_bulkfree.c
sys/vfs/hammer2/hammer2_chain.c
sys/vfs/hammer2/hammer2_cluster.c
sys/vfs/hammer2/hammer2_flush.c
sys/vfs/hammer2/hammer2_freemap.c
sys/vfs/hammer2/hammer2_inode.c
sys/vfs/hammer2/hammer2_iocom.c
sys/vfs/hammer2/hammer2_ioctl.c
sys/vfs/hammer2/hammer2_strategy.c
sys/vfs/hammer2/hammer2_subr.c
sys/vfs/hammer2/hammer2_synchro.c
sys/vfs/hammer2/hammer2_vfsops.c
sys/vfs/hammer2/hammer2_vnops.c
sys/vfs/hammer2/hammer2_xops.c

index 3fccc83..392f854 100644 (file)
@@ -481,6 +481,11 @@ show_bref(int fd, int tab, int bi, hammer2_blockref_t *bref, int dofreemap,
        tab += SHOW_TAB;
        if (bref->flags)
                printf("flags=%02x ", bref->flags);
+       if (bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE ||
+           bref->type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) {
+               printf("bigmask=%08x avail=%ld ",
+                       bref->check.freemap.bigmask, bref->check.freemap.avail);
+       }
 
        bytes = (bref->data_off & HAMMER2_OFF_MASK_RADIX);
        if (bytes)
index 81a519a..5d62ec6 100644 (file)
@@ -290,9 +290,14 @@ main(int ac, char **av)
         * We also include the boot and redo areas in the reserve.  The
         * reserve is used to help 'df' calculate the amount of available
         * space.
+        *
+        * XXX I kinda screwed up and made the reserved area on the LEVEL1
+        *     boundary rather than the ZONE boundary.  LEVEL1 is on 1GB
+        *     boundaries rather than 2GB boundaries.  Stick with the LEVEL1
+        *     boundary.
         */
-       reserved_space = ((total_space + HAMMER2_ZONE_MASK64) /
-                         HAMMER2_ZONE_BYTES64) * HAMMER2_ZONE_SEG64;
+       reserved_space = ((total_space + HAMMER2_FREEMAP_LEVEL1_MASK) /
+                         HAMMER2_FREEMAP_LEVEL1_SIZE) * HAMMER2_ZONE_SEG64;
 
        free_space = total_space - reserved_space -
                     BootAreaSize - AuxAreaSize;
index 0438c22..5a55094 100644 (file)
@@ -1,7 +1,4 @@
 
-* bulkfree pass needs to do a vchain flush from the root to avoid
-  accidently freeing live in-process chains.
-
 * Need backend synchronization / serialization when the frontend detaches
   a XOP.  modify_tid tests won't be enough, the backend may wind up executing
   the XOP out of order after the detach.
index 6aac09b..ea5fd9f 100644 (file)
@@ -395,14 +395,25 @@ RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp);
  *      NULL on other errors.  Check chain->error, not chain->data.
  */
 #define HAMMER2_ERROR_NONE             0       /* no error (must be 0) */
-#define HAMMER2_ERROR_IO               0x0001  /* device I/O error */
-#define HAMMER2_ERROR_CHECK            0x0002  /* check code mismatch */
-#define HAMMER2_ERROR_INCOMPLETE       0x0004  /* incomplete cluster */
-#define HAMMER2_ERROR_DEPTH            0x0008  /* temporary depth limit */
-#define HAMMER2_ERROR_BADBREF          0x0010  /* temporary depth limit */
-
-#define HAMMER2_ERROR_ABORTED          0x1000  /* aborted operation */
-#define HAMMER2_ERROR_EOF              0x2000  /* non-error end of scan */
+#define HAMMER2_ERROR_EIO              0x00000001      /* device I/O error */
+#define HAMMER2_ERROR_CHECK            0x00000002      /* check code error */
+#define HAMMER2_ERROR_INCOMPLETE       0x00000004      /* incomplete cluster */
+#define HAMMER2_ERROR_DEPTH            0x00000008      /* tmp depth limit */
+#define HAMMER2_ERROR_BADBREF          0x00000010      /* illegal bref */
+#define HAMMER2_ERROR_ENOSPC           0x00000020      /* allocation failure */
+#define HAMMER2_ERROR_ENOENT           0x00000040      /* entry not found */
+#define HAMMER2_ERROR_ENOTEMPTY                0x00000080      /* dir not empty */
+#define HAMMER2_ERROR_EAGAIN           0x00000100      /* retry */
+#define HAMMER2_ERROR_ENOTDIR          0x00000200      /* not directory */
+#define HAMMER2_ERROR_EISDIR           0x00000400      /* is directory */
+#define HAMMER2_ERROR_EINPROGRESS      0x00000800      /* already running */
+#define HAMMER2_ERROR_ABORTED          0x00001000      /* aborted operation */
+#define HAMMER2_ERROR_EOF              0x00002000      /* end of scan */
+#define HAMMER2_ERROR_EINVAL           0x00004000      /* catch-all */
+#define HAMMER2_ERROR_EEXIST           0x00008000      /* entry exists */
+#define HAMMER2_ERROR_EDEADLK          0x00010000
+#define HAMMER2_ERROR_ESRCH            0x00020000
+#define HAMMER2_ERROR_ETIMEDOUT                0x00040000
 
 /*
  * Flags passed to hammer2_chain_lookup() and hammer2_chain_next()
@@ -1053,6 +1064,7 @@ struct hammer2_dev {
        struct spinlock io_spin;        /* iotree, iolruq access */
        struct hammer2_io_tree iotree;
        int             iofree_count;
+       int             freemap_relaxed;
        hammer2_chain_t vchain;         /* anchor chain (topology) */
        hammer2_chain_t fchain;         /* anchor chain (freemap) */
        struct spinlock list_spin;
@@ -1065,6 +1077,7 @@ struct hammer2_dev {
        hammer2_dedup_t heur_dedup[HAMMER2_DEDUP_HEUR_SIZE];
        int             volhdrno;       /* last volhdrno written */
        uint32_t        hflags;         /* HMNT2 flags applicable to device */
+       hammer2_off_t   free_reserved;  /* nominal free reserved */
        hammer2_thread_t bfthr;         /* bulk-free thread */
        char            devrepname[64]; /* for kprintf */
        hammer2_ioc_bulkfree_t bflast;  /* stats for last bulkfree run */
@@ -1156,8 +1169,10 @@ struct hammer2_pfs {
        uint8_t                 pfs_mode;       /* operating mode PFSMODE */
        uint8_t                 unused01;
        uint8_t                 unused02;
-       int                     unused03;
+       int                     free_ticks;     /* free_* calculations */
        long                    inmem_inodes;
+       hammer2_off_t           free_reserved;
+       hammer2_off_t           free_nominal;
        uint32_t                inmem_dirty_chains;
        int                     count_lwinprog; /* logical write in prog */
        struct spinlock         list_spin;
@@ -1276,18 +1291,49 @@ int
 hammer2_error_to_errno(int error)
 {
        if (error) {
-               if (error & HAMMER2_ERROR_IO)
+               if (error & HAMMER2_ERROR_EIO)
                        error = EIO;
                else if (error & HAMMER2_ERROR_CHECK)
                        error = EDOM;
                else if (error & HAMMER2_ERROR_ABORTED)
                        error = EINTR;
+               else if (error & HAMMER2_ERROR_BADBREF)
+                       error = EIO;
+               else if (error & HAMMER2_ERROR_ENOSPC)
+                       error = ENOSPC;
+               else if (error & HAMMER2_ERROR_ENOENT)
+                       error = ENOENT;
+               else if (error & HAMMER2_ERROR_ENOTEMPTY)
+                       error = ENOTEMPTY;
+               else if (error & HAMMER2_ERROR_EAGAIN)
+                       error = EAGAIN;
+               else if (error & HAMMER2_ERROR_ENOTDIR)
+                       error = ENOTDIR;
+               else if (error & HAMMER2_ERROR_EISDIR)
+                       error = EISDIR;
+               else if (error & HAMMER2_ERROR_EINPROGRESS)
+                       error = EINPROGRESS;
                else
                        error = EDOM;
        }
        return error;
 }
 
+static __inline
+int
+hammer2_errno_to_error(int error)
+{
+       switch(error) {
+       case 0:
+               return 0;
+       case EIO:
+               return HAMMER2_ERROR_EIO;
+       case EINVAL:
+       default:
+               return HAMMER2_ERROR_EINVAL;
+       }
+}
+
 extern struct vop_ops hammer2_vnode_vops;
 extern struct vop_ops hammer2_spec_vops;
 extern struct vop_ops hammer2_fifo_vops;
@@ -1432,11 +1478,11 @@ int hammer2_chain_inode_find(hammer2_pfs_t *pmp, hammer2_key_t inum,
                                int clindex, int flags,
                                hammer2_chain_t **parentp,
                                hammer2_chain_t **chainp);
-void hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid,
+int hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid,
                                hammer2_off_t dedup_off, int flags);
-void hammer2_chain_modify_ip(hammer2_inode_t *ip, hammer2_chain_t *chain,
+int hammer2_chain_modify_ip(hammer2_inode_t *ip, hammer2_chain_t *chain,
                                hammer2_tid_t mtid, int flags);
-void hammer2_chain_resize(hammer2_chain_t *chain,
+int hammer2_chain_resize(hammer2_chain_t *chain,
                                hammer2_tid_t mtid, hammer2_off_t dedup_off,
                                int nradix, int flags);
 void hammer2_chain_unlock(hammer2_chain_t *chain);
@@ -1471,7 +1517,7 @@ void hammer2_chain_rename(hammer2_blockref_t *bref,
                                hammer2_chain_t **parentp,
                                hammer2_chain_t *chain,
                                hammer2_tid_t mtid, int flags);
-void hammer2_chain_delete(hammer2_chain_t *parent, hammer2_chain_t *chain,
+int hammer2_chain_delete(hammer2_chain_t *parent, hammer2_chain_t *chain,
                                hammer2_tid_t mtid, int flags);
 void hammer2_chain_setflush(hammer2_chain_t *chain);
 void hammer2_chain_countbrefs(hammer2_chain_t *chain,
@@ -1498,7 +1544,7 @@ void hammer2_base_insert(hammer2_chain_t *chain,
 /*
  * hammer2_flush.c
  */
-void hammer2_flush(hammer2_chain_t *chain, int istop);
+int hammer2_flush(hammer2_chain_t *chain, int istop);
 void hammer2_delayed_flush(hammer2_chain_t *chain);
 
 /*
@@ -1629,6 +1675,8 @@ int hammer2_msg_adhoc_input(kdmsg_msg_t *msg);
 void hammer2_volconf_update(hammer2_dev_t *hmp, int index);
 void hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp, char pfx);
 int hammer2_vfs_sync(struct mount *mp, int waitflags);
+int hammer2_vfs_enospace(hammer2_inode_t *ip, off_t bytes, struct ucred *cred);
+
 hammer2_pfs_t *hammer2_pfsalloc(hammer2_chain_t *chain,
                                const hammer2_inode_data_t *ripdata,
                                hammer2_tid_t modify_tid,
index a50640c..5afaf6e 100644 (file)
@@ -145,8 +145,10 @@ hammer2_thr_wait_any(hammer2_thread_t *thr, uint32_t flags, int timo)
                        error = tsleep(&thr->flags, PINTERLOCKED,
                                       "h2twait", timo);
                }
-               if (error == ETIMEDOUT)
+               if (error == ETIMEDOUT) {
+                       error = HAMMER2_ERROR_ETIMEDOUT;
                        break;
+               }
        }
        return error;
 }
@@ -653,7 +655,7 @@ hammer2_xop_feed(hammer2_xop_head_t *xop, hammer2_chain_t *chain,
         * Early termination (typicaly of xop_readir)
         */
        if (hammer2_xop_active(xop) == 0) {
-               error = EINTR;
+               error = HAMMER2_ERROR_ABORTED;
                goto done;
        }
 
@@ -669,7 +671,7 @@ hammer2_xop_feed(hammer2_xop_head_t *xop, hammer2_chain_t *chain,
                atomic_set_int(&fifo->flags, HAMMER2_XOP_FIFO_STALL);
                mask = xop->run_mask;
                if ((mask & HAMMER2_XOPMASK_VOP) == 0) {
-                       error = EINTR;
+                       error = HAMMER2_ERROR_ABORTED;
                        goto done;
                }
                tsleep_interlock(xop, 0);
@@ -829,11 +831,11 @@ loop:
         */
        if ((flags & HAMMER2_XOP_COLLECT_WAITALL) &&
            xop->run_mask != HAMMER2_XOPMASK_VOP) {
-               error = EINPROGRESS;
+               error = HAMMER2_ERROR_EINPROGRESS;
        } else {
                error = hammer2_cluster_check(&xop->cluster, lokey, keynull);
        }
-       if (error == EINPROGRESS) {
+       if (error == HAMMER2_ERROR_EINPROGRESS) {
                if ((flags & HAMMER2_XOP_COLLECT_NOWAIT) == 0)
                        tsleep_interlock(&xop->check_counter, 0);
                if (atomic_cmpset_int(&xop->check_counter,
@@ -845,21 +847,21 @@ loop:
                }
                goto loop;
        }
-       if (error == ESRCH) {
+       if (error == HAMMER2_ERROR_ESRCH) {
                if (lokey != HAMMER2_KEY_MAX) {
                        xop->collect_key = lokey + 1;
                        goto loop;
                }
-               error = ENOENT;
+               error = HAMMER2_ERROR_ENOENT;
        }
-       if (error == EDEADLK) {
+       if (error == HAMMER2_ERROR_EDEADLK) {
                kprintf("hammer2: no quorum possible lokey %016jx\n",
                        lokey);
                if (lokey != HAMMER2_KEY_MAX) {
                        xop->collect_key = lokey + 1;
                        goto loop;
                }
-               error = ENOENT;
+               error = HAMMER2_ERROR_ENOENT;
        }
        if (lokey == HAMMER2_KEY_MAX)
                xop->collect_key = lokey;
index 38514a6..f6987cd 100644 (file)
 
 #include "hammer2.h"
 
+/*
+ * XXX I made a mistake and made the reserved area begin at each LEVEL1 zone,
+ *     which is on a 1GB demark.  This will eat a little more space but for
+ *     now we retain compatibility and make FMZONEBASE every 1GB
+ */
+#define H2FMZONEBASE(key)         ((key) & ~HAMMER2_FREEMAP_LEVEL1_MASK)
 #define H2FMBASE(key, radix)    ((key) & ~(((hammer2_off_t)1 << (radix)) - 1))
 #define H2FMSHIFT(radix)        ((hammer2_off_t)1 << (radix))
 
@@ -184,10 +190,14 @@ hammer2_bulk_scan(hammer2_chain_t *parent,
                                int savepri = info->pri;
 
                                hammer2_chain_unlock(chain);
+                               hammer2_chain_unlock(parent);
                                info->pri = 0;
                                rup_error |=
                                        hammer2_bulk_scan(chain, func, info);
                                info->pri += savepri;
+                               hammer2_chain_lock(parent,
+                                                  HAMMER2_RESOLVE_ALWAYS |
+                                                  HAMMER2_RESOLVE_SHARED);
                                hammer2_chain_lock(chain,
                                                   HAMMER2_RESOLVE_ALWAYS |
                                                   HAMMER2_RESOLVE_SHARED);
@@ -488,11 +498,11 @@ cbinfo_bmap_init(hammer2_bulkfree_info_t *cbinfo, size_t size)
 
        bzero(bmap, size);
        while (size) {
-               if (lokey < H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX) +
-                           HAMMER2_ZONE_SEG64) {
-                       lokey = H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX) +
-                               HAMMER2_ZONE_SEG64;
-               }
+               bzero(bmap, sizeof(*bmap));
+               if (lokey < H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX))
+                       lokey = H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX);
+               if (lokey < H2FMZONEBASE(key) + HAMMER2_ZONE_SEG64)
+                       lokey = H2FMZONEBASE(key) + HAMMER2_ZONE_SEG64;
                if (key < lokey || key >= hikey) {
                         memset(bmap->bitmapq, -1,
                                sizeof(bmap->bitmapq));
@@ -554,7 +564,7 @@ h2_bulkfree_callback(hammer2_bulkfree_info_t *cbinfo, hammer2_blockref_t *bref)
        bytes = (size_t)1 << radix;
        class = (bref->type << 8) | hammer2_devblkradix(radix);
 
-       if (data_off + bytes >= cbinfo->sstop) {
+       if (data_off + bytes > cbinfo->sstop) {
                kprintf("hammer2_bulkfree_scan: illegal 2GB boundary "
                        "%016jx %016jx/%d\n",
                        (intmax_t)bref->data_off,
@@ -590,6 +600,15 @@ h2_bulkfree_callback(hammer2_bulkfree_info_t *cbinfo, hammer2_blockref_t *bref)
                bmap->class = class;
                bmap->avail = HAMMER2_FREEMAP_LEVEL0_SIZE;
        }
+
+       /*
+        * NOTE: bmap->class does not have to match class.  Classification
+        *       is relaxed when free space is low, so some mixing can occur.
+        */
+#if 0
+       /*
+        * XXX removed
+        */
        if (bmap->class != class) {
                kprintf("hammer2_bulkfree_scan: illegal mixed class "
                        "%016jx %016jx/%d (%04x vs %04x)\n",
@@ -598,6 +617,7 @@ h2_bulkfree_callback(hammer2_bulkfree_info_t *cbinfo, hammer2_blockref_t *bref)
                        bref->keybits,
                        class, bmap->class);
        }
+#endif
 
        /*
         * Just record the highest byte-granular offset for now.  Do not
@@ -666,6 +686,7 @@ h2_bulkfree_callback(hammer2_bulkfree_info_t *cbinfo, hammer2_blockref_t *bref)
  *        11             10 -> 11      handles race against live
  *                       ** -> 11      nominally warn of corruption
  * 
+ * We must also fixup the hints in HAMMER2_BREF_TYPE_FREEMAP_LEAF.
  */
 static int
 h2_bulkfree_sync(hammer2_bulkfree_info_t *cbinfo)
@@ -799,6 +820,8 @@ h2_bulkfree_sync(hammer2_bulkfree_info_t *cbinfo)
                }
 
                hammer2_chain_modify(live_chain, cbinfo->mtid, 0, 0);
+               live_chain->bref.check.freemap.bigmask = -1;
+               cbinfo->hmp->freemap_relaxed = 0;       /* reset heuristic */
                live = &live_chain->data->bmdata[bmapindex];
 
                h2_bulkfree_sync_adjust(cbinfo, data_off, live, bmap,
index 0d2ff2a..a225eb1 100644 (file)
@@ -331,7 +331,7 @@ hammer2_chain_insert(hammer2_chain_t *parent, hammer2_chain_t *chain,
         */
        if ((flags & HAMMER2_CHAIN_INSERT_RACE) &&
            parent->core.generation != generation) {
-               error = EAGAIN;
+               error = HAMMER2_ERROR_EAGAIN;
                goto failed;
        }
 
@@ -742,8 +742,19 @@ hammer2_chain_lastdrop(hammer2_chain_t *chain)
                 * 1->0 transition successful, parent spin held to prevent
                 * new lookups, chain spinlock held to protect parent field.
                 * Remove chain from the parent.
+                *
+                * If the chain is being removed from the parent's btree but
+                * is not bmapped, we have to adjust live_count downward.  If
+                * it is bmapped then the blockref is retained in the parent
+                * as is its associated live_count.  This case can occur when
+                * a chain added to the topology is unable to flush and is
+                * then later deleted.
                 */
                if (chain->flags & HAMMER2_CHAIN_ONRBTREE) {
+                       if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) &&
+                           (chain->flags & HAMMER2_CHAIN_BMAPPED) == 0) {
+                               atomic_add_int(&parent->core.live_count, -1);
+                       }
                        RB_REMOVE(hammer2_chain_tree,
                                  &parent->core.rbtree, chain);
                        atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE);
@@ -1042,12 +1053,12 @@ hammer2_chain_load_data(hammer2_chain_t *chain)
        int error;
 
        /*
-        * Degenerate case, data already present, or chain is not expected
-        * to have any data.
+        * Degenerate case, data already present, or chain has no media
+        * reference to load.
         */
        if (chain->data)
                return;
-       if ((chain->bref.data_off & HAMMER2_OFF_MASK_RADIX) == 0)
+       if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
                return;
 
        hmp = chain->hmp;
@@ -1117,7 +1128,7 @@ hammer2_chain_load_data(hammer2_chain_t *chain)
                hammer2_adjreadcounter(&chain->bref, chain->bytes);
        }
        if (error) {
-               chain->error = HAMMER2_ERROR_IO;
+               chain->error = HAMMER2_ERROR_EIO;
                kprintf("hammer2_chain_lock: I/O error %016jx: %d\n",
                        (intmax_t)bref->data_off, error);
                hammer2_io_bqrelse(&chain->dio);
@@ -1390,6 +1401,9 @@ hammer2_chain_base_and_count(hammer2_chain_t *parent, int *countp)
  * also calculates the point at which all remaining blockrefs are empty.
  * This routine can only be called on a live chain.
  *
+ * Caller holds the chain locked, but possibly with a shared lock.  We
+ * must use an exclusive spinlock to prevent corruption.
+ *
  * NOTE: Flag is not set until after the count is complete, allowing
  *      callers to test the flag without holding the spinlock.
  *
@@ -1449,7 +1463,7 @@ hammer2_chain_countbrefs(hammer2_chain_t *chain,
  *
  * XXX return error if cannot resize.
  */
-void
+int
 hammer2_chain_resize(hammer2_chain_t *chain,
                     hammer2_tid_t mtid, hammer2_off_t dedup_off,
                     int nradix, int flags)
@@ -1457,6 +1471,7 @@ hammer2_chain_resize(hammer2_chain_t *chain,
        hammer2_dev_t *hmp;
        size_t obytes;
        size_t nbytes;
+       int error;
 
        hmp = chain->hmp;
 
@@ -1475,7 +1490,7 @@ hammer2_chain_resize(hammer2_chain_t *chain,
        obytes = chain->bytes;
        nbytes = (nradix) ? (1U << nradix) : 0;
        if (obytes == nbytes)
-               return;
+               return (chain->error);
 
        /*
         * Make sure the old data is instantiated so we can copy it.  If this
@@ -1485,7 +1500,9 @@ hammer2_chain_resize(hammer2_chain_t *chain,
         *
         * NOTE: The modify will set BMAPUPD for us if BMAPPED is set.
         */
-       hammer2_chain_modify(chain, mtid, dedup_off, 0);
+       error = hammer2_chain_modify(chain, mtid, dedup_off, 0);
+       if (error)
+               return error;
 
        /*
         * Relocate the block, even if making it smaller (because different
@@ -1495,7 +1512,10 @@ hammer2_chain_resize(hammer2_chain_t *chain,
         *        to resize data blocks in-place, or directory entry blocks
         *        which are about to be modified in some manner.
         */
-       hammer2_freemap_alloc(chain, nbytes);
+       error = hammer2_freemap_alloc(chain, nbytes);
+       if (error)
+               return error;
+
        chain->bytes = nbytes;
 
        /*
@@ -1510,6 +1530,7 @@ hammer2_chain_resize(hammer2_chain_t *chain,
                hammer2_io_brelse(&chain->dio);
                chain->data = NULL;
        }
+       return (chain->error);
 }
 
 /*
@@ -1522,6 +1543,12 @@ hammer2_chain_resize(hammer2_chain_t *chain,
  * is a CLC (cluster level change) field and is not updated by parent
  * propagation during a flush.
  *
+ * Returns an appropriate HAMMER2_ERROR_* code, which will generally reflect
+ * chain->error except for HAMMER2_ERROR_ENOSPC.  If the allocation fails
+ * due to no space available, HAMMER2_ERROR_ENOSPC is returned and the chain
+ * remains unmodified with its old data ref intact and chain->error
+ * unchanged.
+ *
  *                              Dedup Handling
  *
  * If the DEDUPABLE flag is set in the chain the storage must be reallocated
@@ -1533,7 +1560,7 @@ hammer2_chain_resize(hammer2_chain_t *chain,
  * DEDUPABLE will be set (NOTE: the UPDATE flag is always set).  The caller
  * must not modify the data content upon return.
  */
-void
+int
 hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid,
                     hammer2_off_t dedup_off, int flags)
 {
@@ -1542,6 +1569,8 @@ hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid,
        hammer2_io_t *dio;
        int error;
        int wasinitial;
+       int setmodified;
+       int setupdate;
        int newmod;
        char *bdata;
 
@@ -1561,13 +1590,17 @@ hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid,
 
        /*
         * Data must be resolved if already assigned, unless explicitly
-        * flagged otherwise.
+        * flagged otherwise.  If we cannot safety load the data the
+        * modification fails and we return early.
         */
        if (chain->data == NULL && chain->bytes != 0 &&
            (flags & HAMMER2_MODIFY_OPTDATA) == 0 &&
            (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)) {
                hammer2_chain_load_data(chain);
+               if (chain->error)
+                       return (chain->error);
        }
+       error = 0;
 
        /*
         * Set MODIFIED to indicate that the chain has been modified.  A new
@@ -1575,7 +1608,6 @@ hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid,
         *
         * Set UPDATE to ensure that the blockref is updated in the parent.
         *
-        *
         * If MODIFIED is already set determine if we can reuse the assigned
         * data block or if we need a new data block.
         */
@@ -1586,6 +1618,7 @@ hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid,
                atomic_add_long(&hammer2_count_modified_chains, 1);
                atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED);
                hammer2_pfs_memory_inc(chain->pmp);  /* can be NULL */
+               setmodified = 1;
 
                /*
                 * We may be able to avoid a copy-on-write if the chain's
@@ -1624,30 +1657,38 @@ hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid,
                 * cache).
                 */
                newmod = 1;
+               setmodified = 0;
        } else {
                /*
                 * Already flagged modified, no new allocation is needed.
                 */
                newmod = 0;
+               setmodified = 0;
        }
 
        /*
         * Flag parent update required.
         */
-       if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0)
+       if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) {
                atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE);
+               setupdate = 1;
+       } else {
+               setupdate = 0;
+       }
 
        /*
         * The modification or re-modification requires an allocation and
-        * possible COW.
+        * possible COW.  If an error occurs, the previous content and data
+        * reference is retained and the modification fails.
+        *
+        * If dedup_off is non-zero, the caller is requesting a deduplication
+        * rather than a modification.  The MODIFIED bit is not set and the
+        * data offset is set to the deduplication offset.  The data cannot
+        * be modified.
         *
-        * If dedup_off is non-zero, caller already has a data offset
-        * containing the caller's desired data.  The dedup offset is
-        * allowed to be in a partially free state and we must be sure
-        * to reset it to a fully allocated state to force two bulkfree
-        * passes to free it again.  The chain will not be marked MODIFIED
-        * in the dedup case, as the dedup data cannot be changed without
-        * a new allocation.
+        * NOTE: The dedup offset is allowed to be in a partially free state
+        *       and we must be sure to reset it to a fully allocated state
+        *       to force two bulkfree passes to free it again.
         *
         * NOTE: Only applicable when chain->bytes != 0.
         *
@@ -1669,6 +1710,7 @@ hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid,
                                chain->bref.data_off = dedup_off;
                                chain->bytes = 1 << (dedup_off &
                                                     HAMMER2_OFF_MASK_RADIX);
+                               chain->error = 0;
                                atomic_clear_int(&chain->flags,
                                                 HAMMER2_CHAIN_MODIFIED);
                                atomic_add_long(&hammer2_count_modified_chains,
@@ -1680,14 +1722,31 @@ hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid,
                                atomic_set_int(&chain->flags,
                                                HAMMER2_CHAIN_DEDUPABLE);
                        } else {
-                               hammer2_freemap_alloc(chain, chain->bytes);
+                               error = hammer2_freemap_alloc(chain,
+                                                             chain->bytes);
                                atomic_clear_int(&chain->flags,
                                                HAMMER2_CHAIN_DEDUPABLE);
                        }
-                       /* XXX failed allocation */
                }
        }
 
+       /*
+        * Stop here if error.  We have to undo any flag bits we might
+        * have set above.
+        */
+       if (error) {
+               if (setmodified) {
+                       atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED);
+                       atomic_add_long(&hammer2_count_modified_chains, -1);
+                       if (chain->pmp)
+                               hammer2_pfs_memory_wakeup(chain->pmp);
+               }
+               if (setupdate) {
+                       atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE);
+               }
+               return error;
+       }
+
        /*
         * Update mirror_tid and modify_tid.  modify_tid is only updated
         * if not passed as zero (during flushes, parent propagation passes
@@ -1798,7 +1857,7 @@ hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid,
                if (error) {
                        kprintf("hammer2_chain_modify: hmp=%p I/O error\n",
                                hmp);
-                       chain->error = HAMMER2_ERROR_IO;
+                       chain->error = HAMMER2_ERROR_EIO;
                        hammer2_io_brelse(&dio);
                        hammer2_io_brelse(&chain->dio);
                        chain->data = NULL;
@@ -1857,17 +1916,22 @@ skip2:
         */
        if (chain->parent)
                hammer2_chain_setflush(chain->parent);
+       return (chain->error);
 }
 
 /*
  * Modify the chain associated with an inode.
  */
-void
+int
 hammer2_chain_modify_ip(hammer2_inode_t *ip, hammer2_chain_t *chain,
                        hammer2_tid_t mtid, int flags)
 {
+       int error;
+
        hammer2_inode_modify(ip);
-       hammer2_chain_modify(chain, mtid, 0, flags);
+       error = hammer2_chain_modify(chain, mtid, 0, flags);
+
+       return error;
 }
 
 /*
@@ -3208,6 +3272,7 @@ again:
        if (chain->parent != NULL)
                panic("hammer2: hammer2_chain_create: chain already connected");
        KKASSERT(chain->parent == NULL);
+       KKASSERT(parent->core.live_count < count);
        hammer2_chain_insert(parent, chain,
                             HAMMER2_CHAIN_INSERT_SPIN |
                             HAMMER2_CHAIN_INSERT_LIVE,
@@ -3233,8 +3298,8 @@ again:
                case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
                case HAMMER2_BREF_TYPE_DIRENT:
                case HAMMER2_BREF_TYPE_INODE:
-                       hammer2_chain_modify(chain, mtid, dedup_off,
-                                            HAMMER2_MODIFY_OPTDATA);
+                       error = hammer2_chain_modify(chain, mtid, dedup_off,
+                                                    HAMMER2_MODIFY_OPTDATA);
                        break;
                default:
                        /*
@@ -3367,11 +3432,12 @@ hammer2_chain_rename(hammer2_blockref_t *bref,
  *
  * parent may not be errored.  chain can be errored.
  */
-static void
+static int
 _hammer2_chain_delete_helper(hammer2_chain_t *parent, hammer2_chain_t *chain,
                             hammer2_tid_t mtid, int flags)
 {
        hammer2_dev_t *hmp;
+       int error = 0;
 
        KKASSERT((chain->flags & (HAMMER2_CHAIN_DELETED |
                                  HAMMER2_CHAIN_FICTITIOUS)) == 0);
@@ -3391,7 +3457,9 @@ _hammer2_chain_delete_helper(hammer2_chain_t *parent, hammer2_chain_t *chain,
                KKASSERT(parent != NULL);
                KKASSERT(parent->error == 0);
                KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0);
-               hammer2_chain_modify(parent, mtid, 0, 0);
+               error = hammer2_chain_modify(parent, mtid, 0, 0);
+               if (error)
+                       goto done;
 
                /*
                 * Calculate blockmap pointer
@@ -3495,6 +3563,8 @@ _hammer2_chain_delete_helper(hammer2_chain_t *parent, hammer2_chain_t *chain,
                 */
                atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED);
        }
+done:
+       return error;
 }
 
 /*
@@ -3503,7 +3573,7 @@ _hammer2_chain_delete_helper(hammer2_chain_t *parent, hammer2_chain_t *chain,
  * ref changes or returns the new indirect block locked and referenced
  * and leaving the original parent lock/ref intact as well.
  *
- * If an error occurs, NULL is returned and *errorp is set to the error.
+ * If an error occurs, NULL is returned and *errorp is set to the H2 error.
  *
  * The returned chain depends on where the specified key falls.
  *
@@ -3573,6 +3643,7 @@ hammer2_chain_create_indirect(hammer2_chain_t *parent,
        int ncount;
        int nbytes;
        int loops;
+       int error;
        int reason;
        int generation;
        int maxloops = 300000;
@@ -3583,9 +3654,19 @@ hammer2_chain_create_indirect(hammer2_chain_t *parent,
         * for RB lookups either way.
         */
        hmp = parent->hmp;
-       *errorp = 0;
        KKASSERT(hammer2_mtx_owned(&parent->lock));
 
+       /*
+        * Pre-modify the parent now to avoid having to deal with error
+        * processing if we tried to later (in the middle of our loop).
+        */
+       *errorp = hammer2_chain_modify(parent, mtid, 0, 0);
+       if (*errorp) {
+               kprintf("hammer2_create_indirect: error %08x %s\n",
+                       *errorp, hammer2_error_str(*errorp));
+               return NULL;
+       }
+
        /*hammer2_chain_modify(&parent, HAMMER2_MODIFY_OPTDATA);*/
        base = hammer2_chain_base_and_count(parent, &count);
 
@@ -3704,7 +3785,14 @@ hammer2_chain_create_indirect(hammer2_chain_t *parent,
         * OPTDATA to allow it to remain in the INITIAL state.  Otherwise
         * it won't be acted upon by the flush code.
         */
-       hammer2_chain_modify(ichain, mtid, 0, HAMMER2_MODIFY_OPTDATA);
+       *errorp = hammer2_chain_modify(ichain, mtid, 0, HAMMER2_MODIFY_OPTDATA);
+       if (*errorp) {
+               kprintf("hammer2_alloc_indirect: error %08x %s\n",
+                       *errorp, hammer2_error_str(*errorp));
+               hammer2_chain_unlock(ichain);
+               hammer2_chain_drop(ichain);
+               return NULL;
+       }
 
        /*
         * Iterate the original parent and move the matching brefs into
@@ -3785,7 +3873,6 @@ hammer2_chain_create_indirect(hammer2_chain_t *parent,
                                continue;
                        }
                        if (bcmp(&bcopy, bref, sizeof(bcopy))) {
-                               kprintf("REASON 2\n");
                                reason = 2;
                                hammer2_chain_drop(chain);
                                hammer2_spin_ex(&parent->core.spin);
@@ -3825,8 +3912,12 @@ hammer2_chain_create_indirect(hammer2_chain_t *parent,
                 *
                 * WARNING! The (parent, chain) deletion may modify the parent
                 *          and invalidate the base pointer.
+                *
+                * WARNING! Parent must already be marked modified, so we
+                *          can assume that chain_delete always suceeds.
                 */
-               hammer2_chain_delete(parent, chain, mtid, 0);
+               error = hammer2_chain_delete(parent, chain, mtid, 0);
+               KKASSERT(error == 0);
                hammer2_chain_rename(NULL, &ichain, chain, mtid, 0);
                hammer2_chain_unlock(chain);
                hammer2_chain_drop(chain);
@@ -3859,6 +3950,7 @@ next_key_spinlocked:
         */
        base = NULL;    /* safety, parent modify may change address */
        KKASSERT((ichain->flags & HAMMER2_CHAIN_ONRBTREE) == 0);
+       KKASSERT(parent->core.live_count < count);
        hammer2_chain_insert(parent, ichain,
                             HAMMER2_CHAIN_INSERT_SPIN |
                             HAMMER2_CHAIN_INSERT_LIVE,
@@ -4501,10 +4593,12 @@ hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp,
  * Also note that the flusher is responsible for cleaning up empty
  * indirect blocks.
  */
-void
+int
 hammer2_chain_delete(hammer2_chain_t *parent, hammer2_chain_t *chain,
                     hammer2_tid_t mtid, int flags)
 {
+       int error = 0;
+
        KKASSERT(hammer2_mtx_owned(&chain->lock));
 
        /*
@@ -4516,15 +4610,20 @@ hammer2_chain_delete(hammer2_chain_t *parent, hammer2_chain_t *chain,
        if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) {
                KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 &&
                         chain->parent == parent);
-               _hammer2_chain_delete_helper(parent, chain, mtid, flags);
+               error = _hammer2_chain_delete_helper(parent, chain,
+                                                    mtid, flags);
        }
 
        /*
         * Permanent deletions mark the chain as destroyed.
         */
-       if (flags & HAMMER2_DELETE_PERMANENT)
-               atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY);
-       hammer2_chain_setflush(chain);
+       if (error == 0) {
+               if (flags & HAMMER2_DELETE_PERMANENT)
+                       atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY);
+               hammer2_chain_setflush(chain);
+       }
+
+       return error;
 }
 
 /*
@@ -5268,7 +5367,7 @@ hammer2_chain_inode_find(hammer2_pfs_t *pmp, hammer2_key_t inum,
                                              inum, inum,
                                              &error, flags);
        } else {
-               error = HAMMER2_ERROR_IO;
+               error = HAMMER2_ERROR_EIO;
        }
        *parentp = parent;
        *chainp = rchain;
@@ -5372,7 +5471,8 @@ hammer2_chain_snapshot(hammer2_chain_t *chain, hammer2_ioc_pfs_t *pmp,
        if (nip) {
                hammer2_inode_modify(nip);
                nchain = hammer2_inode_chain(nip, 0, HAMMER2_RESOLVE_ALWAYS);
-               hammer2_chain_modify(nchain, mtid, 0, 0);
+               error = hammer2_chain_modify(nchain, mtid, 0, 0);
+               KKASSERT(error == 0);
                wipdata = &nchain->data->ipdata;
 
                nip->meta.pfs_type = HAMMER2_PFSTYPE_MASTER;
@@ -5380,12 +5480,12 @@ hammer2_chain_snapshot(hammer2_chain_t *chain, hammer2_ioc_pfs_t *pmp,
                nip->meta.op_flags |= HAMMER2_OPFLAG_PFSROOT;
                kern_uuidgen(&nip->meta.pfs_fsid, 1);
 
+#if 0
                /*
                 * Give the snapshot its own private cluster id.  As a
                 * snapshot no further synchronization with the original
                 * cluster will be done.
                 */
-#if 0
                if (chain->flags & HAMMER2_CHAIN_PFSBOUNDARY)
                        nip->meta.pfs_clid = opfs_clid;
                else
index 735f50a..1c21f8a 100644 (file)
@@ -908,10 +908,10 @@ hammer2_cluster_check(hammer2_cluster_t *cluster, hammer2_key_t key, int flags)
         */
        if (nmasters < nquorum) {
                if (nmasters + umasters >= nquorum)
-                       return EINPROGRESS;
+                       return HAMMER2_ERROR_EINPROGRESS;
                if (nmasters_keymatch < nquorum) 
-                       return ESRCH;
-               return EDEADLK;
+                       return HAMMER2_ERROR_ESRCH;
+               return HAMMER2_ERROR_EDEADLK;
        }
 
        /*
@@ -919,7 +919,7 @@ hammer2_cluster_check(hammer2_cluster_t *cluster, hammer2_key_t key, int flags)
         */
        if (flags & HAMMER2_CHECK_NULL) {
                if (cluster->error == 0)
-                       cluster->error = ENOENT;
+                       cluster->error = HAMMER2_ERROR_ENOENT;
                return cluster->error;
        }
 
@@ -928,7 +928,7 @@ hammer2_cluster_check(hammer2_cluster_t *cluster, hammer2_key_t key, int flags)
         * had chain errors.
         */
        if (cluster->focus == NULL)
-               return EIO;
+               return HAMMER2_ERROR_EIO;
 
        /*
         * Pass 3
index a679b4a..440154d 100644 (file)
@@ -68,7 +68,7 @@ struct hammer2_flush_info {
        hammer2_chain_t *parent;
        int             depth;
        int             diddeferral;
-       int             unused01;
+       int             error;                  /* cumulative error */
        int             flags;
        struct h2_flush_list flushq;
        hammer2_chain_t *debug;
@@ -314,6 +314,9 @@ hammer2_delayed_flush(hammer2_chain_t *chain)
  * part of this propagation, mirror_tid and inode/data usage statistics
  * propagates back upward.
  *
+ * Returns a HAMMER2 error code, 0 if no error.  Note that I/O errors from
+ * buffers dirtied during the flush operation can occur later.
+ *
  * modify_tid (clc - cluster level change) is not propagated.
  *
  * update_tid (clc) is used for validation and is not propagated by this
@@ -326,7 +329,7 @@ hammer2_delayed_flush(hammer2_chain_t *chain)
  * UPDATE flag indicates that its parent's block table (which is not yet
  * part of the flush) should be updated.
  */
-void
+int
 hammer2_flush(hammer2_chain_t *chain, int flags)
 {
        hammer2_chain_t *scan;
@@ -393,9 +396,13 @@ hammer2_flush(hammer2_chain_t *chain, int flags)
                        if (hammer2_debug & 0x0040)
                                kprintf("deferred flush %p\n", scan);
                        hammer2_chain_lock(scan, HAMMER2_RESOLVE_MAYBE);
-                       hammer2_flush(scan, flags & ~HAMMER2_FLUSH_TOP);
-                       hammer2_chain_unlock(scan);
-                       hammer2_chain_drop(scan);       /* ref from deferral */
+                       if (scan->error == 0) {
+                               hammer2_flush(scan, flags & ~HAMMER2_FLUSH_TOP);
+                               hammer2_chain_unlock(scan);
+                               hammer2_chain_drop(scan);/* ref from defer */
+                       } else {
+                               info.error |= scan->error;
+                       }
                }
 
                /*
@@ -420,6 +427,7 @@ hammer2_flush(hammer2_chain_t *chain, int flags)
        hammer2_chain_drop(chain);
        if (info.parent)
                hammer2_chain_drop(info.parent);
+       return (info.error);
 }
 
 /*
@@ -470,6 +478,7 @@ hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain,
        hammer2_chain_t *parent;
        hammer2_dev_t *hmp;
        int diddeferral;
+       int save_error;
 
        /*
         * (1) Optimize downward recursion to locate nodes needing action.
@@ -536,8 +545,9 @@ hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain,
                                   HAMMER2_CHAIN_DESTROY)) {
                /*
                 * Downward recursion search (actual flush occurs bottom-up).
-                * pre-clear ONFLUSH.  It can get set again due to races,
-                * which we want so the scan finds us again in the next flush.
+                * pre-clear ONFLUSH.  It can get set again due to races or
+                * flush errors, which we want so the scan finds us again in
+                * the next flush.
                 *
                 * We must also recurse if DESTROY is set so we can finally
                 * get rid of the related children, otherwise the node will
@@ -548,12 +558,23 @@ hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain,
                 *           to be ripped up.
                 */
                atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH);
+               save_error = info->error;
+               info->error = 0;
                info->parent = chain;
                hammer2_spin_ex(&chain->core.spin);
                RB_SCAN(hammer2_chain_tree, &chain->core.rbtree,
                        NULL, hammer2_flush_recurse, info);
                hammer2_spin_unex(&chain->core.spin);
                info->parent = parent;
+
+               /*
+                * Re-set the flush bits if the flush was incomplete or
+                * an error occurred.  If an error occurs it is typically
+                * an allocation error.  Errors do not cause deferrals.
+                */
+               if (info->error)
+                       hammer2_chain_setflush(chain);
+               info->error |= save_error;
                if (info->diddeferral)
                        hammer2_chain_setflush(chain);
 
@@ -573,7 +594,12 @@ hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain,
        /*
         * Now we are in the bottom-up part of the recursion.
         *
-        * Do not update chain if lower layers were deferred.
+        * Do not update chain if lower layers were deferred.  We continue
+        * to try to update the chain on lower-level errors, but the flush
+        * code may decide not to flush the volume root.
+        *
+        * XXX should we continue to try to update the chain if an error
+        *     occurred?
         */
        if (info->diddeferral)
                goto done;
@@ -590,6 +616,20 @@ hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain,
        if (parent)
                hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS);
        hammer2_chain_lock(chain, HAMMER2_RESOLVE_MAYBE);
+
+       /*
+        * Can't process if we can't access their content.
+        */
+       if ((parent && parent->error) || chain->error) {
+               kprintf("hammer2: chain error during flush\n");
+               info->error |= chain->error;
+               if (parent) {
+                       info->error |= parent->error;
+                       hammer2_chain_unlock(parent);
+               }
+               goto done;
+       }
+
        if (chain->parent != parent) {
                kprintf("LOST CHILD3 %p->%p (actual parent %p)\n",
                        parent, chain, chain->parent);
@@ -631,14 +671,14 @@ hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain,
                }
        }
 
+       /*
+        * Dispose of the modified bit.
+        *
+        * If parent is present, the UPDATE bit should already be set.
+        * UPDATE should already be set.
+        * bref.mirror_tid should already be set.
+        */
        if (chain->flags & HAMMER2_CHAIN_MODIFIED) {
-               /*
-                * Dispose of the modified bit.
-                *
-                * If parent is present, the UPDATE bit should already be set.
-                * UPDATE should already be set.
-                * bref.mirror_tid should already be set.
-                */
                KKASSERT((chain->flags & HAMMER2_CHAIN_UPDATE) ||
                         chain->parent == NULL);
                if (hammer2_debug & 0x800000) {
@@ -742,6 +782,9 @@ hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain,
                         * We must still hold fchain locked while copying
                         * voldata to volsync, however.
                         *
+                        * These do not error per-say since their data does
+                        * not need to be re-read from media on lock.
+                        *
                         * (note: embedded data, do not call setdirty)
                         */
                        hammer2_chain_lock(&hmp->fchain,
@@ -898,6 +941,7 @@ hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain,
 
        /*
         * If UPDATE is set the parent block table may need to be updated.
+        * This can fail if the hammer2_chain_modify() fails.
         *
         * NOTE: UPDATE may be set on vchain or fchain in which case
         *       parent could be NULL.  It's easiest to allow the case
@@ -922,8 +966,7 @@ hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain,
                 * Clear UPDATE flag, mark parent modified, update its
                 * modify_tid if necessary, and adjust the parent blockmap.
                 */
-               if (chain->flags & HAMMER2_CHAIN_UPDATE)
-                       atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE);
+               atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE);
 
                /*
                 * (optional code)
@@ -979,9 +1022,26 @@ hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain,
 
                /*
                 * We are updating the parent's blockmap, the parent must
-                * be set modified.
+                * be set modified.  If this fails we re-set the UPDATE flag
+                * in the child.
+                *
+                * NOTE! A modification error can be ENOSPC.  We still want
+                *       to flush modified chains recursively, not break out,
+                *       so we just skip the update in this situation and
+                *       continue.  That is, we still need to try to clean
+                *       out dirty chains and buffers.
+                *
+                *       This may not help bulkfree though. XXX
                 */
-               hammer2_chain_modify(parent, 0, 0, 0);
+               save_error = hammer2_chain_modify(parent, 0, 0, 0);
+               if (save_error) {
+                       info->error |= save_error;
+                       kprintf("hammer2_flush: %016jx.%02x error=%08x\n",
+                               parent->bref.data_off, parent->bref.type,
+                               save_error);
+                       atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE);
+                       goto skipupdate;
+               }
                if (parent->bref.modify_tid < chain->bref.modify_tid)
                        parent->bref.modify_tid = chain->bref.modify_tid;
 
@@ -1075,6 +1135,8 @@ done:
  * by sync_tid.  Set info->domodify if the child's blockref must propagate
  * back up to the parent.
  *
+ * This function may set info->error as a side effect.
+ *
  * Ripouts can move child from rbtree to dbtree or dbq but the caller's
  * flush scan order prevents any chains from being lost.  A child can be
  * executes more than once.
@@ -1100,6 +1162,9 @@ hammer2_flush_recurse(hammer2_chain_t *child, void *data)
         * The caller has added a ref to the parent so we can temporarily
         * unlock it in order to lock the child.  However, if it no longer
         * winds up being the child of the parent we must skip this child.
+        *
+        * NOTE! chain locking errors are fatal.  They are never out-of-space
+        *       errors.
         */
        hammer2_chain_ref(child);
        hammer2_spin_unex(&parent->core.spin);
@@ -1111,6 +1176,12 @@ hammer2_flush_recurse(hammer2_chain_t *child, void *data)
                        parent, child, child->parent);
                goto done;
        }
+       if (child->error) {
+               kprintf("CHILD ERROR DURING FLUSH LOCK %p->%p\n",
+                       parent, child);
+               info->error |= child->error;
+               goto done;
+       }
 
        /*
         * Must propagate the DESTROY flag downwards, otherwise the
@@ -1141,10 +1212,15 @@ hammer2_flush_recurse(hammer2_chain_t *child, void *data)
 
 done:
        /*
-        * Relock to continue the loop
+        * Relock to continue the loop.
         */
        hammer2_chain_unlock(child);
        hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE);
+       if (parent->error) {
+               kprintf("PARENT ERROR DURING FLUSH LOCK %p->%p\n",
+                       parent, child);
+               info->error |= parent->error;
+       }
        hammer2_chain_drop(child);
        KKASSERT(info->parent == parent);
        hammer2_spin_ex(&parent->core.spin);
@@ -1152,45 +1228,6 @@ done:
        return (0);
 }
 
-#if 0
-/*
- * Flush helper (direct)
- *
- * Quickly flushes any dirty chains for a device and returns a temporary
- * out-of-band copy of hmp->vchain that the caller can use as a stable
- * reference.
- *
- * This function does not flush the actual volume root and does not flush dirty
- * device buffers.  We don't care about pending work, per-say.  This function
- * is primarily used by the bulkfree code to create a stable snapshot of
- * the block tree.
- */
-hammer2_chain_t *
-hammer2_flush_quick(hammer2_dev_t *hmp)
-{
-       hammer2_chain_t *chain;
-       hammer2_chain_t *copy;
-
-       hammer2_trans_init(hmp->spmp, HAMMER2_TRANS_ISFLUSH);
-
-       hammer2_chain_ref(&hmp->vchain);
-       hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS);
-       if (hmp->vchain.flags & HAMMER2_CHAIN_FLUSH_MASK) {
-               chain = &hmp->vchain;
-               hammer2_flush(chain, HAMMER2_FLUSH_TOP |
-                                    HAMMER2_FLUSH_ALL);
-               KKASSERT(chain == &hmp->vchain);
-       }
-       copy = hammer2_chain_bulksnap(&hmp->vchain);
-       hammer2_chain_unlock(&hmp->vchain);
-       hammer2_chain_drop(&hmp->vchain);
-
-       hammer2_trans_done(hmp->spmp);  /* spmp trans */
-
-       return copy;
-}
-#endif
-
 /*
  * flush helper (backend threaded)
  *
@@ -1205,7 +1242,8 @@ hammer2_inode_xop_flush(hammer2_thread_t *thr, hammer2_xop_t *arg)
        hammer2_chain_t *chain;
        hammer2_chain_t *parent;
        hammer2_dev_t *hmp;
-       int error = 0;
+       int flush_error = 0;
+       int fsync_error = 0;
        int total_error = 0;
        int j;
 
@@ -1260,6 +1298,9 @@ hammer2_inode_xop_flush(hammer2_thread_t *thr, hammer2_xop_t *arg)
         * independently, so the free block table can wind up being
         * ahead of the topology.  We depend on the bulk free scan
         * code to deal with any loose ends.
+        *
+        * vchain and fchain do not error on-lock since their data does
+        * not have to be re-read from media.
         */
        hammer2_chain_ref(&hmp->vchain);
        hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS);
@@ -1272,7 +1313,7 @@ hammer2_inode_xop_flush(hammer2_thread_t *thr, hammer2_xop_t *arg)
                 */
                hammer2_voldata_modify(hmp);
                chain = &hmp->fchain;
-               hammer2_flush(chain, HAMMER2_FLUSH_TOP);
+               flush_error |= hammer2_flush(chain, HAMMER2_FLUSH_TOP);
                KKASSERT(chain == &hmp->fchain);
        }
        hammer2_chain_unlock(&hmp->fchain);
@@ -1283,14 +1324,12 @@ hammer2_inode_xop_flush(hammer2_thread_t *thr, hammer2_xop_t *arg)
        hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS);
        if (hmp->vchain.flags & HAMMER2_CHAIN_FLUSH_MASK) {
                chain = &hmp->vchain;
-               hammer2_flush(chain, HAMMER2_FLUSH_TOP);
+               flush_error |= hammer2_flush(chain, HAMMER2_FLUSH_TOP);
                KKASSERT(chain == &hmp->vchain);
        }
        hammer2_chain_unlock(&hmp->vchain);
        hammer2_chain_drop(&hmp->vchain);
 
-       error = 0;
-
        /*
         * We can't safely flush the volume header until we have
         * flushed any device buffers which have built up.
@@ -1298,10 +1337,12 @@ hammer2_inode_xop_flush(hammer2_thread_t *thr, hammer2_xop_t *arg)
         * XXX this isn't being incremental
         */
        vn_lock(hmp->devvp, LK_EXCLUSIVE | LK_RETRY);
-       error = VOP_FSYNC(hmp->devvp, MNT_WAIT, 0);
+       fsync_error = VOP_FSYNC(hmp->devvp, MNT_WAIT, 0);
        vn_unlock(hmp->devvp);
-       if (error)
-               kprintf("error %d cannot sync %s\n", error, hmp->devrepname);
+       if (fsync_error || flush_error) {
+               kprintf("hammer2: sync error fsync=%d h2flush=0x%04x dev=%s\n",
+                       fsync_error, flush_error, hmp->devrepname);
+       }
 
        /*
         * The flush code sets CHAIN_VOLUMESYNC to indicate that the
@@ -1309,9 +1350,10 @@ hammer2_inode_xop_flush(hammer2_thread_t *thr, hammer2_xop_t *arg)
         *
         * XXX synchronize the flag & data with only this flush XXX
         */
-       if (error == 0 &&
+       if (fsync_error == 0 && flush_error == 0 &&
            (hmp->vchain.flags & HAMMER2_CHAIN_VOLUMESYNC)) {
                struct buf *bp;
+               int vol_error = 0;
 
                /*
                 * Synchronize the disk before flushing the volume
@@ -1325,7 +1367,7 @@ hammer2_inode_xop_flush(hammer2_thread_t *thr, hammer2_xop_t *arg)
                bp->b_bio1.bio_done = biodone_sync;
                bp->b_bio1.bio_flags |= BIO_SYNC;
                vn_strategy(hmp->devvp, &bp->b_bio1);
-               biowait(&bp->b_bio1, "h2vol");
+               fsync_error = biowait(&bp->b_bio1, "h2vol");
                relpbuf(bp, NULL);
 
                /*
@@ -1349,13 +1391,17 @@ hammer2_inode_xop_flush(hammer2_thread_t *thr, hammer2_xop_t *arg)
                atomic_clear_int(&hmp->vchain.flags,
                                 HAMMER2_CHAIN_VOLUMESYNC);
                bcopy(&hmp->volsync, bp->b_data, HAMMER2_PBUFSIZE);
-               bawrite(bp);
+               vol_error = bwrite(bp);
                hmp->volhdrno = j;
+               if (vol_error)
+                       fsync_error = vol_error;
        }
-       if (error)
-               total_error = error;
+       if (flush_error)
+               total_error = flush_error;
+       if (fsync_error)
+               total_error = hammer2_errno_to_error(fsync_error);
 
        hammer2_trans_done(hmp->spmp);  /* spmp trans */
 skip:
-       error = hammer2_xop_feed(&xop->head, NULL, thr->clindex, total_error);
+       hammer2_xop_feed(&xop->head, NULL, thr->clindex, total_error);
 }
index cd01fb2..aa4081d 100644 (file)
@@ -51,6 +51,7 @@ struct hammer2_fiterate {
        hammer2_off_t   bpref;
        hammer2_off_t   bnext;
        int             loops;
+       int             relaxed;
 };
 
 typedef struct hammer2_fiterate hammer2_fiterate_t;
@@ -80,10 +81,18 @@ hammer2_freemapradix(int radix)
  * chains use fixed storage offsets in the 4MB reserved area at the
  * beginning of each 2GB zone
  *
+ * XXX I made a mistake and made the reserved area begin at each LEVEL1 zone,
+ *     which is on a 1GB demark.  This will eat a little more space but for
+ *     now we retain compatibility and make FMZONEBASE every 1GB
+ *
+ *     (see same thing in hammer2_bulkfree.c near the top, as well as in
+ *     newfs_hammer2).
+ *
  * Rotate between four possibilities.  Theoretically this means we have three
  * good freemaps in case of a crash which we can use as a base for the fixup
  * scan at mount-time.
  */
+#define H2FMZONEBASE(key)      ((key) & ~HAMMER2_FREEMAP_LEVEL1_MASK)
 #define H2FMBASE(key, radix)   ((key) & ~(((hammer2_off_t)1 << (radix)) - 1))
 #define H2FMSHIFT(radix)       ((hammer2_off_t)1 << (radix))
 
@@ -234,7 +243,6 @@ hammer2_freemap_alloc(hammer2_chain_t *chain, size_t bytes)
                 * area, not allocated from the freemap.
                 */
                error = hammer2_freemap_reserve(chain, radix);
-               KKASSERT(error == 0);
 
                return error;
        }
@@ -277,6 +285,7 @@ hammer2_freemap_alloc(hammer2_chain_t *chain, size_t bytes)
        KKASSERT(hindex < HAMMER2_FREEMAP_HEUR_SIZE);
 
        iter.bpref = hmp->heur_freemap[hindex];
+       iter.relaxed = hmp->freemap_relaxed;
 
        /*
         * Make sure bpref is in-bounds.  It's ok if bpref covers a zone's
@@ -291,20 +300,19 @@ hammer2_freemap_alloc(hammer2_chain_t *chain, size_t bytes)
        parent = &hmp->fchain;
        hammer2_chain_ref(parent);
        hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS);
-       error = EAGAIN;
+       error = HAMMER2_ERROR_EAGAIN;
        iter.bnext = iter.bpref;
        iter.loops = 0;
 
-       while (error == EAGAIN) {
+       while (error == HAMMER2_ERROR_EAGAIN) {
                error = hammer2_freemap_try_alloc(&parent, bref, radix,
                                                  &iter, mtid);
        }
+       hmp->freemap_relaxed |= iter.relaxed;   /* heuristical, SMP race ok */
        hmp->heur_freemap[hindex] = iter.bnext;
        hammer2_chain_unlock(parent);
        hammer2_chain_drop(parent);
 
-       KKASSERT(error == 0);
-
        return (error);
 }
 
@@ -349,7 +357,6 @@ hammer2_freemap_try_alloc(hammer2_chain_t **parentp,
                                     &error,
                                     HAMMER2_LOOKUP_ALWAYS |
                                     HAMMER2_LOOKUP_MATCHIND);
-       error = hammer2_error_to_errno(error);
 
        if (chain == NULL) {
                /*
@@ -385,13 +392,13 @@ hammer2_freemap_try_alloc(hammer2_chain_t **parentp,
                kprintf("hammer2_freemap_try_alloc: %016jx: error %s\n",
                        (intmax_t)bref->data_off,
                        hammer2_error_str(chain->error));
-               error = EIO;
+               error = HAMMER2_ERROR_EIO;
        } else if ((chain->bref.check.freemap.bigmask &
                   ((size_t)1 << radix)) == 0) {
                /*
                 * Already flagged as not having enough space
                 */
-               error = ENOSPC;
+               error = HAMMER2_ERROR_ENOSPC;
        } else {
                /*
                 * Modify existing chain to setup for adjustment.
@@ -415,7 +422,7 @@ hammer2_freemap_try_alloc(hammer2_chain_t **parentp,
                KKASSERT(start >= 0 && start < HAMMER2_FREEMAP_COUNT);
                hammer2_chain_modify(chain, mtid, 0, 0);
 
-               error = ENOSPC;
+               error = HAMMER2_ERROR_ENOSPC;
                for (count = 0; count < HAMMER2_FREEMAP_COUNT; ++count) {
                        int availchk;
 
@@ -425,7 +432,8 @@ hammer2_freemap_try_alloc(hammer2_chain_t **parentp,
                        }
 
                        /*
-                        * Calculate bmap pointer
+                        * Calculate bmap pointer from thart starting index
+                        * forwards.
                         *
                         * NOTE: bmap pointer is invalid if n >= FREEMAP_COUNT.
                         */
@@ -443,21 +451,30 @@ hammer2_freemap_try_alloc(hammer2_chain_t **parentp,
                                availchk = 0;
                        }
 
+                       /*
+                        * Try to allocate from a matching freemap class
+                        * superblock.  If we are in relaxed mode we allocate
+                        * from any freemap class superblock.
+                        */
                        if (availchk &&
-                           (bmap->class == 0 || bmap->class == class)) {
+                           (bmap->class == 0 || bmap->class == class ||
+                            iter->relaxed)) {
                                base_key = key + n * l0size;
                                error = hammer2_bmap_alloc(hmp, bmap,
                                                           class, n,
                                                           (int)bref->key,
                                                           radix,
                                                           &base_key);
-                               if (error != ENOSPC) {
+                               if (error != HAMMER2_ERROR_ENOSPC) {
                                        key = base_key;
                                        break;
                                }
                        }
 
                        /*
+                        * Calculate bmap pointer from thart starting index
+                        * backwards (locality).
+                        *
                         * Must recalculate after potentially having called
                         * hammer2_bmap_alloc() above in case chain was
                         * reallocated.
@@ -477,21 +494,32 @@ hammer2_freemap_try_alloc(hammer2_chain_t **parentp,
                                availchk = 0;
                        }
 
+                       /*
+                        * Try to allocate from a matching freemap class
+                        * superblock.  If we are in relaxed mode we allocate
+                        * from any freemap class superblock.
+                        */
                        if (availchk &&
-                           (bmap->class == 0 || bmap->class == class)) {
+                           (bmap->class == 0 || bmap->class == class ||
+                           iter->relaxed)) {
                                base_key = key + n * l0size;
                                error = hammer2_bmap_alloc(hmp, bmap,
                                                           class, n,
                                                           (int)bref->key,
                                                           radix,
                                                           &base_key);
-                               if (error != ENOSPC) {
+                               if (error != HAMMER2_ERROR_ENOSPC) {
                                        key = base_key;
                                        break;
                                }
                        }
                }
-               if (error == ENOSPC) {
+
+               /*
+                * We only know for sure that we can clear the bitmap bit
+                * if we scanned the entire array (start == 0).
+                */
+               if (error == HAMMER2_ERROR_ENOSPC && start == 0) {
                        chain->bref.check.freemap.bigmask &=
                                (uint32_t)~((size_t)1 << radix);
                }
@@ -526,7 +554,7 @@ hammer2_freemap_try_alloc(hammer2_chain_t **parentp,
                        chain,
                        bref->key, bref->data_off, chain->bref.data_off);
 #endif
-       } else if (error == ENOSPC) {
+       } else if (error == HAMMER2_ERROR_ENOSPC) {
                /*
                 * Return EAGAIN with next iteration in iter->bnext, or
                 * return ENOSPC if the allocation map has been exhausted.
@@ -693,7 +721,7 @@ hammer2_bmap_alloc(hammer2_dev_t *hmp, hammer2_bmap_data_t *bmap,
                }
                /*fragments might remain*/
                /*KKASSERT(bmap->avail == 0);*/
-               return (ENOSPC);
+               return (HAMMER2_ERROR_ENOSPC);
 success:
                offset = i * (HAMMER2_SEGSIZE / HAMMER2_BMAP_ELEMENTS) +
                         (j * (HAMMER2_FREEMAP_BLOCK_SIZE / 2));
@@ -783,11 +811,14 @@ success:
         * and available bytes, update the allocation offset (*basep)
         * from the L0 base to the actual offset.
         *
+        * Do not override the class if doing a relaxed class allocation.
+        *
         * avail must reflect the bitmap-granular availability.  The allocator
         * tests will also check the linear iterator.
         */
        bmap->bitmapq[i] |= bmmask;
-       bmap->class = class;
+       if (bmap->class == 0)
+               bmap->class = class;
        bmap->avail -= bgsize;
        *basep += offset;
 
@@ -807,6 +838,9 @@ success:
        return(0);
 }
 
+/*
+ * Initialize a freemap for the storage area (in bytes) that begins at (key).
+ */
 static
 void
 hammer2_freemap_init(hammer2_dev_t *hmp, hammer2_key_t key,
@@ -818,44 +852,58 @@ hammer2_freemap_init(hammer2_dev_t *hmp, hammer2_key_t key,
        hammer2_bmap_data_t *bmap;
        int count;
 
+       /*
+        * LEVEL1 is 1GB, there are two level1 1GB freemaps per 2GB zone.
+        */
        l1size = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX);
 
        /*
-        * Calculate the portion of the 2GB map that should be initialized
+        * Calculate the portion of the 1GB map that should be initialized
         * as free.  Portions below or after will be initialized as allocated.
         * SEGMASK-align the areas so we don't have to worry about sub-scans
         * or endianess when using memset.
         *
-        * (1) Ensure that all statically allocated space from newfs_hammer2
-        *     is marked allocated.
-        *
-        * (2) Ensure that the reserved area is marked allocated (typically
-        *     the first 4MB of the 2GB area being represented).
-        *
-        * (3) Ensure that any trailing space at the end-of-volume is marked
-        *     allocated.
-        *
         * WARNING! It is possible for lokey to be larger than hikey if the
         *          entire 2GB segment is within the static allocation.
         */
+       /*
+        * (1) Ensure that all statically allocated space from newfs_hammer2
+        *     is marked allocated, and take it up to the level1 base for
+        *     this key.
+        */
        lokey = (hmp->voldata.allocator_beg + HAMMER2_SEGMASK64) &
                ~HAMMER2_SEGMASK64;
+       if (lokey < H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX))
+               lokey = H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX);
 
-       if (lokey < H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX) +
-                 HAMMER2_ZONE_SEG64) {
-               lokey = H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX) +
-                       HAMMER2_ZONE_SEG64;
-       }
+       /*
+        * (2) Ensure that the reserved area is marked allocated (typically
+        *     the first 4MB of each 2GB area being represented).  Since
+        *     each LEAF represents 1GB of storage and the zone is 2GB, we
+        *     have to adjust lowkey upward every other LEAF sequentially.
+        */
+       if (lokey < H2FMZONEBASE(key) + HAMMER2_ZONE_SEG64)
+               lokey = H2FMZONEBASE(key) + HAMMER2_ZONE_SEG64;
 
+       /*
+        * (3) Ensure that any trailing space at the end-of-volume is marked
+        *     allocated.
+        */
        hikey = key + H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX);
        if (hikey > hmp->voldata.volu_size) {
                hikey = hmp->voldata.volu_size & ~HAMMER2_SEGMASK64;
        }
 
+       /*
+        * Heuristic highest possible value
+        */
        chain->bref.check.freemap.avail =
                H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX);
        bmap = &chain->data->bmdata[0];
 
+       /*
+        * Initialize bitmap (bzero'd by caller)
+        */
        for (count = 0; count < HAMMER2_FREEMAP_COUNT; ++count) {
                if (key < lokey || key >= hikey) {
                        memset(bmap->bitmapq, -1,
@@ -876,6 +924,11 @@ hammer2_freemap_init(hammer2_dev_t *hmp, hammer2_key_t key,
  * The current Level 1 freemap has been exhausted, iterate to the next
  * one, return ENOSPC if no freemaps remain.
  *
+ * At least two loops are required.  If we are not in relaxed mode and
+ * we run out of storage we enter relaxed mode and do a third loop.
+ * The relaxed mode is recorded back in the hmp so once we enter the mode
+ * we remain relaxed until stuff begins to get freed and only do 2 loops.
+ *
  * XXX this should rotate back to the beginning to handle freed-up space
  * XXX or use intermediate entries to locate free space. TODO
  */
@@ -889,10 +942,14 @@ hammer2_freemap_iterate(hammer2_chain_t **parentp, hammer2_chain_t **chainp,
        iter->bnext += H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX);
        if (iter->bnext >= hmp->voldata.volu_size) {
                iter->bnext = 0;
-               if (++iter->loops == 2)
-                       return (ENOSPC);
+               if (++iter->loops >= 2) {
+                       if (iter->relaxed == 0)
+                               iter->relaxed = 1;
+                       else
+                               return (HAMMER2_ERROR_ENOSPC);
+               }
        }
-       return(EAGAIN);
+       return(HAMMER2_ERROR_EAGAIN);
 }
 
 /*
@@ -976,7 +1033,6 @@ hammer2_freemap_adjust(hammer2_dev_t *hmp, hammer2_blockref_t *bref,
                                     &error,
                                     HAMMER2_LOOKUP_ALWAYS |
                                     HAMMER2_LOOKUP_MATCHIND);
-       error = hammer2_error_to_errno(error);
 
        /*
         * Stop early if we are trying to free something but no leaf exists.
@@ -1016,7 +1072,8 @@ hammer2_freemap_adjust(hammer2_dev_t *hmp, hammer2_blockref_t *bref,
                }
 
                if (error == 0) {
-                       hammer2_chain_modify(chain, mtid, 0, 0);
+                       error = hammer2_chain_modify(chain, mtid, 0, 0);
+                       KKASSERT(error == 0);
                        bzero(&chain->data->bmdata[0],
                              HAMMER2_FREEMAP_LEVELN_PSIZE);
                        chain->bref.check.freemap.bigmask = (uint32_t)-1;
@@ -1178,9 +1235,14 @@ again:
         * be something allocatable.  We also set this in recovery... it
         * doesn't hurt and we might want to use the hint for other validation
         * operations later on.
+        *
+        * We could calculate the largest possible allocation and set the
+        * radii that could fit, but its easier just to set bigmask to -1.
         */
-       if (modified)
-               chain->bref.check.freemap.bigmask |= 1 << radix;
+       if (modified) {
+               chain->bref.check.freemap.bigmask = -1;
+               hmp->freemap_relaxed = 0;       /* reset heuristic */
+       }
 
        hammer2_chain_unlock(chain);
        hammer2_chain_drop(chain);
index a86c6b5..190a539 100644 (file)
@@ -386,6 +386,8 @@ hammer2_inode_drop(hammer2_inode_t *ip)
  * Get the vnode associated with the given inode, allocating the vnode if
  * necessary.  The vnode will be returned exclusively locked.
  *
+ * *errorp is set to a UNIX error, not a HAMMER2 error.
+ *
  * The caller must lock the inode (shared or exclusive).
  *
  * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim
@@ -661,7 +663,9 @@ again:
  * entries), but note that this really only applies OBJTYPE_DIRECTORY as
  * non-directory inodes can be hardlinked.
  *
- * If no error occurs the new inode with its cluster locked is returned.
+ * If no error occurs the new inode is returned, otherwise NULL is returned.
+ * It is possible for an error to create a junk inode and then fail later.
+ * It will attempt to delete the junk inode and return NULL in this situation.
  *
  * If vap and/or cred are NULL the related fields are not set and the
  * inode type defaults to a directory.  This is used when creating PFSs
@@ -740,13 +744,13 @@ hammer2_inode_create(hammer2_inode_t *dip, hammer2_inode_t *pip,
                hammer2_xop_retire(&sxop->head, HAMMER2_XOPMASK_VOP);
 
                if (error) {
-                       if (error != ENOENT)
+                       if (error != HAMMER2_ERROR_ENOENT)
                                goto done2;
                        ++lhc;
                        error = 0;
                }
                if ((lhcbase ^ lhc) & ~HAMMER2_DIRHASH_LOMASK) {
-                       error = ENOSPC;
+                       error = HAMMER2_ERROR_ENOSPC;
                        goto done2;
                }
        }
@@ -871,6 +875,8 @@ done2:
 /*
  * Create a directory entry under dip with the specified name, inode number,
  * and OBJTYPE (type).
+ *
+ * This returns a UNIX errno code, not a HAMMER2_ERROR_* code.
  */
 int
 hammer2_dirent_create(hammer2_inode_t *dip, const char *name, size_t name_len,
@@ -918,13 +924,13 @@ hammer2_dirent_create(hammer2_inode_t *dip, const char *name, size_t name_len,
                hammer2_xop_retire(&sxop->head, HAMMER2_XOPMASK_VOP);
 
                if (error) {
-                       if (error != ENOENT)
+                       if (error != HAMMER2_ERROR_ENOENT)
                                goto done2;
                        ++lhc;
                        error = 0;
                }
                if ((lhcbase ^ lhc) & ~HAMMER2_DIRHASH_LOMASK) {
-                       error = ENOSPC;
+                       error = HAMMER2_ERROR_ENOSPC;
                        goto done2;
                }
        }
@@ -948,6 +954,7 @@ hammer2_dirent_create(hammer2_inode_t *dip, const char *name, size_t name_len,
 
        hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
 done2:
+       error = hammer2_error_to_errno(error);
        hammer2_inode_unlock(dip);
 
        return error;
@@ -1159,7 +1166,8 @@ killit:
                error = hammer2_xop_collect(&xop->head, 0);
                hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
        }
-       error = 0;
+       error = 0;      /* XXX */
+
        return error;
 }
 
@@ -1221,7 +1229,7 @@ hammer2_inode_chain_sync(hammer2_inode_t *ip)
                hammer2_xop_start(&xop->head, hammer2_inode_xop_chain_sync);
                error = hammer2_xop_collect(&xop->head, 0);
                hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
-               if (error == ENOENT)
+               if (error == HAMMER2_ERROR_ENOENT)
                        error = 0;
                if (error) {
                        kprintf("hammer2: unable to fsync inode %p\n", ip);
@@ -1277,6 +1285,7 @@ hammer2_inode_run_sideq(hammer2_pfs_t *pmp)
                        hammer2_xop_start(&xop->head,
                                          hammer2_inode_xop_destroy);
                        error = hammer2_xop_collect(&xop->head, 0);
+                       /* XXX error handling */
                        hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
                } else {
                        /*
@@ -1315,16 +1324,15 @@ hammer2_inode_xop_mkdirent(hammer2_thread_t *thr, hammer2_xop_t *arg)
        parent = hammer2_inode_chain(xop->head.ip1, thr->clindex,
                                     HAMMER2_RESOLVE_ALWAYS);
        if (parent == NULL) {
-               error = EIO;
+               error = HAMMER2_ERROR_EIO;
                chain = NULL;
                goto fail;
        }
        chain = hammer2_chain_lookup(&parent, &key_next,
                                     xop->lhc, xop->lhc,
                                     &error, 0);
-       error = hammer2_error_to_errno(error);
        if (chain) {
-               error = EEXIST;
+               error = HAMMER2_ERROR_EEXIST;
                goto fail;
        }
 
@@ -1349,15 +1357,16 @@ hammer2_inode_xop_mkdirent(hammer2_thread_t *thr, hammer2_xop_t *arg)
                 *          do not use sizeof(chain->data->buf), which
                 *          will be much larger.
                 */
-               hammer2_chain_modify(chain, xop->head.mtid, 0, 0);
-
-               chain->bref.embed.dirent = xop->dirent;
-               if (xop->dirent.namlen <= sizeof(chain->bref.check.buf))
-                       bcopy(xop->head.name1, chain->bref.check.buf,
-                             xop->dirent.namlen);
-               else
-                       bcopy(xop->head.name1, chain->data->buf,
-                             xop->dirent.namlen);
+               error = hammer2_chain_modify(chain, xop->head.mtid, 0, 0);
+               if (error == 0) {
+                       chain->bref.embed.dirent = xop->dirent;
+                       if (xop->dirent.namlen <= sizeof(chain->bref.check.buf))
+                               bcopy(xop->head.name1, chain->bref.check.buf,
+                                     xop->dirent.namlen);
+                       else
+                               bcopy(xop->head.name1, chain->data->buf,
+                                     xop->dirent.namlen);
+               }
        }
 fail:
        if (parent) {
@@ -1397,16 +1406,15 @@ hammer2_inode_xop_create(hammer2_thread_t *thr, hammer2_xop_t *arg)
        parent = hammer2_inode_chain(xop->head.ip1, thr->clindex,
                                     HAMMER2_RESOLVE_ALWAYS);
        if (parent == NULL) {
-               error = EIO;
+               error = HAMMER2_ERROR_EIO;
                chain = NULL;
                goto fail;
        }
        chain = hammer2_chain_lookup(&parent, &key_next,
                                     xop->lhc, xop->lhc,
                                     &error, 0);
-       error = hammer2_error_to_errno(error);
        if (chain) {
-               error = EEXIST;
+               error = HAMMER2_ERROR_EEXIST;
                goto fail;
        }
 
@@ -1417,15 +1425,18 @@ hammer2_inode_xop_create(hammer2_thread_t *thr, hammer2_xop_t *arg)
                                     HAMMER2_INODE_BYTES,
                                     xop->head.mtid, 0, xop->flags);
        if (error == 0) {
-               hammer2_chain_modify(chain, xop->head.mtid, 0, 0);
-               chain->data->ipdata.meta = xop->meta;
-               if (xop->head.name1) {
-                       bcopy(xop->head.name1,
-                             chain->data->ipdata.filename,
-                             xop->head.name1_len);
-                       chain->data->ipdata.meta.name_len = xop->head.name1_len;
+               error = hammer2_chain_modify(chain, xop->head.mtid, 0, 0);
+               if (error == 0) {
+                       chain->data->ipdata.meta = xop->meta;
+                       if (xop->head.name1) {
+                               bcopy(xop->head.name1,
+                                     chain->data->ipdata.filename,
+                                     xop->head.name1_len);
+                               chain->data->ipdata.meta.name_len =
+                                       xop->head.name1_len;
+                       }
+                       chain->data->ipdata.meta.name_key = xop->lhc;
                }
-               chain->data->ipdata.meta.name_key = xop->lhc;
        }
 fail:
        if (parent) {
@@ -1463,12 +1474,12 @@ hammer2_inode_xop_destroy(hammer2_thread_t *thr, hammer2_xop_t *arg)
        chain = hammer2_inode_chain(ip, thr->clindex, HAMMER2_RESOLVE_ALWAYS);
        if (chain == NULL) {
                parent = NULL;
-               error = EIO;
+               error = HAMMER2_ERROR_EIO;
                goto done;
        }
        parent = hammer2_chain_getparent(chain, HAMMER2_RESOLVE_ALWAYS);
        if (parent == NULL) {
-               error = EIO;
+               error = HAMMER2_ERROR_EIO;
                goto done;
        }
        KKASSERT(chain->parent == parent);
@@ -1512,7 +1523,6 @@ hammer2_inode_xop_unlinkall(hammer2_thread_t *thr, hammer2_xop_t *arg)
        chain = hammer2_chain_lookup(&parent, &key_next,
                                     xop->key_beg, xop->key_end,
                                     &error, HAMMER2_LOOKUP_ALWAYS);
-       error = hammer2_error_to_errno(error);
        while (chain) {
                hammer2_chain_delete(parent, chain,
                                     xop->head.mtid, HAMMER2_DELETE_PERMANENT);
@@ -1523,10 +1533,9 @@ hammer2_inode_xop_unlinkall(hammer2_thread_t *thr, hammer2_xop_t *arg)
                                           &error,
                                           HAMMER2_LOOKUP_ALWAYS);
        }
-       error = hammer2_error_to_errno(error);
 done:
        if (error == 0)
-               error = ENOENT;
+               error = HAMMER2_ERROR_ENOENT;
        hammer2_xop_feed(&xop->head, NULL, thr->clindex, error);
        if (parent) {
                hammer2_chain_unlock(parent);
@@ -1558,18 +1567,17 @@ hammer2_inode_xop_connect(hammer2_thread_t *thr, hammer2_xop_t *arg)
                                     HAMMER2_RESOLVE_ALWAYS);
        if (parent == NULL) {
                chain = NULL;
-               error = EIO;
+               error = HAMMER2_ERROR_EIO;
                goto fail;
        }
        chain = hammer2_chain_lookup(&parent, &key_dummy,
                                     xop->lhc, xop->lhc,
                                     &error, 0);
-       error = hammer2_error_to_errno(error);
        if (chain) {
                hammer2_chain_unlock(chain);
                hammer2_chain_drop(chain);
                chain = NULL;
-               error = EEXIST;
+               error = HAMMER2_ERROR_EEXIST;
                goto fail;
        }
        if (error)
@@ -1583,7 +1591,10 @@ hammer2_inode_xop_connect(hammer2_thread_t *thr, hammer2_xop_t *arg)
         */
        chain = hammer2_inode_chain(xop->head.ip2, thr->clindex,
                                    HAMMER2_RESOLVE_ALWAYS);
-       hammer2_chain_modify(chain, xop->head.mtid, 0, 0);
+       error = hammer2_chain_modify(chain, xop->head.mtid, 0, 0);
+       if (error)
+               goto fail;
+
        wipdata = &chain->data->ipdata;
 
        hammer2_inode_modify(xop->head.ip2);
@@ -1634,7 +1645,7 @@ hammer2_inode_xop_chain_sync(hammer2_thread_t *thr, hammer2_xop_t *arg)
                                     HAMMER2_RESOLVE_ALWAYS);
        chain = NULL;
        if (parent == NULL) {
-               error = EIO;
+               error = HAMMER2_ERROR_EIO;
                goto done;
        }
        if (parent->error) {
@@ -1682,7 +1693,6 @@ hammer2_inode_xop_chain_sync(hammer2_thread_t *thr, hammer2_xop_t *arg)
                                                   HAMMER2_LOOKUP_NODATA |
                                                   HAMMER2_LOOKUP_NODIRECT);
                }
-               error = hammer2_error_to_errno(error);
 
                /*
                 * Reset to point at inode for following code, if necessary.
@@ -1702,11 +1712,15 @@ hammer2_inode_xop_chain_sync(hammer2_thread_t *thr, hammer2_xop_t *arg)
         * Sync the inode meta-data, potentially clear the blockset area
         * of direct data so it can be used for blockrefs.
         */
-       hammer2_chain_modify(parent, xop->head.mtid, 0, 0);
-       parent->data->ipdata.meta = xop->meta;
-       if (xop->clear_directdata) {
-               bzero(&parent->data->ipdata.u.blockset,
-                     sizeof(parent->data->ipdata.u.blockset));
+       if (error == 0) {
+               error = hammer2_chain_modify(parent, xop->head.mtid, 0, 0);
+               if (error == 0) {
+                       parent->data->ipdata.meta = xop->meta;
+                       if (xop->clear_directdata) {
+                               bzero(&parent->data->ipdata.u.blockset,
+                                     sizeof(parent->data->ipdata.u.blockset));
+                       }
+               }
        }
 done:
        if (chain) {
index f58583e..d226821 100644 (file)
@@ -341,7 +341,6 @@ hammer2_update_spans(hammer2_dev_t *hmp, kdmsg_state_t *state)
                                               &error, 0);
        }
        hammer2_inode_unlock(spmp->iroot);
-       error = hammer2_error_to_errno(error);
        /* XXX do something with error */
 done:
        if (chain) {
index 5031608..06687b4 100644 (file)
@@ -771,7 +771,7 @@ hammer2_ioctl_pfs_delete(hammer2_inode_t *ip, void *data)
 
        hammer2_trans_done(spmp);
 
-       return (error);
+       return (hammer2_error_to_errno(error));
 }
 
 static int
@@ -817,7 +817,7 @@ hammer2_ioctl_pfs_snapshot(hammer2_inode_t *ip, void *data)
 
        lockmgr(&hmp->bulklk, LK_RELEASE);
 
-       return (error);
+       return (hammer2_error_to_errno(error));
 }
 
 /*
@@ -856,7 +856,7 @@ hammer2_ioctl_inode_get(hammer2_inode_t *ip, void *data)
        ino->kdata = ip;
        hammer2_inode_unlock(ip);
 
-       return error;
+       return hammer2_error_to_errno(error);
 }
 
 /*
@@ -903,7 +903,7 @@ hammer2_ioctl_inode_set(hammer2_inode_t *ip, void *data)
        hammer2_inode_unlock(ip);
        hammer2_trans_done(ip->pmp);
 
-       return (error);
+       return (hammer2_error_to_errno(error));
 }
 
 static
@@ -939,6 +939,7 @@ hammer2_ioctl_bulkfree_scan(hammer2_inode_t *ip, void *data)
        hammer2_pfs_t   *pmp;
        hammer2_chain_t *vchain;
        int error;
+       int didsnap;
 
        pmp = ip->pmp;
        ip = pmp->iroot;
@@ -961,25 +962,55 @@ hammer2_ioctl_bulkfree_scan(hammer2_inode_t *ip, void *data)
         * sync the filesystem and obtain a snapshot of the synchronized
         * hmp volume header.  We treat the snapshot as an independent
         * entity.
+        *
+        * If ENOSPC occurs we should continue, because bulkfree is the only
+        * way to fix that.  The flush will have flushed everything it could
+        * and not left any modified chains.  Otherwise an error is fatal.
         */
-       hammer2_vfs_sync(pmp->mp, MNT_WAIT);
-       vchain = hammer2_chain_bulksnap(hmp);
+       error = hammer2_vfs_sync(pmp->mp, MNT_WAIT);
+       if (error && error != ENOSPC)
+               goto failed;
+
+       /*
+        * If we have an ENOSPC error we have to bulkfree on the live
+        * topology.  Otherwise we can bulkfree on a snapshot.
+        */
+       if (error) {
+               kprintf("hammer2: WARNING! Bulkfree forced to use live "
+                       "topology\n");
+               vchain = &hmp->vchain;
+               hammer2_chain_ref(vchain);
+               didsnap = 0;
+       } else {
+               vchain = hammer2_chain_bulksnap(hmp);
+               didsnap = 1;
+       }
 
        /*
-        * Normal filesystem operations will not interfere with the
-        * synchronized block hierarchy and can run concurrent with the
-        * bulkfree pass.
+        * Bulkfree on a snapshot does not need a transaction, which allows
+        * it to run concurrently with any operation other than another
+        * bulkfree.
+        *
+        * If we are running bulkfree on the live topology we have to be
+        * in a FLUSH transaction.
         */
-       hammer2_trans_init(pmp, 0);
+       if (didsnap == 0)
+               hammer2_trans_init(pmp, HAMMER2_TRANS_ISFLUSH);
+
        if (bfi) {
                hammer2_thr_freeze(&hmp->bfthr);
                error = hammer2_bulkfree_pass(hmp, vchain, bfi);
                hammer2_thr_unfreeze(&hmp->bfthr);
        }
-       hammer2_chain_bulkdrop(vchain);
-       hammer2_trans_done(pmp);
+       if (didsnap) {
+               hammer2_chain_bulkdrop(vchain);
+       } else {
+               hammer2_chain_drop(vchain);
+               hammer2_trans_done(pmp);
+       }
+       error = hammer2_error_to_errno(error);
 
+failed:
        lockmgr(&hmp->bflock, LK_RELEASE);
-
-       return (hammer2_error_to_errno(error));
+       return error;
 }
index a1a7e2e..85eb6d9 100644 (file)
@@ -324,9 +324,8 @@ hammer2_strategy_xop_read(hammer2_thread_t *thr, hammer2_xop_t *arg)
                                             HAMMER2_LOOKUP_SHARED);
                if (chain)
                        error = chain->error;
-               error = hammer2_error_to_errno(error);
        } else {
-               error = EIO;
+               error = HAMMER2_ERROR_EIO;
                chain = NULL;
        }
        error = hammer2_xop_feed(&xop->head, chain, thr->clindex, error);
@@ -383,7 +382,7 @@ hammer2_strategy_xop_read(hammer2_thread_t *thr, hammer2_xop_t *arg)
                biodone(bio);
                hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
                break;
-       case ENOENT:
+       case HAMMER2_ERROR_ENOENT:
                xop->finished = 1;
                hammer2_mtx_unlock(&xop->lock);
                bp->b_flags |= B_NOTMETA;
@@ -393,11 +392,11 @@ hammer2_strategy_xop_read(hammer2_thread_t *thr, hammer2_xop_t *arg)
                biodone(bio);
                hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
                break;
-       case EINPROGRESS:
+       case HAMMER2_ERROR_EINPROGRESS:
                hammer2_mtx_unlock(&xop->lock);
                break;
        default:
-               kprintf("strategy_xop_read: error %d loff=%016jx\n",
+               kprintf("strategy_xop_read: error %08x loff=%016jx\n",
                        error, bp->b_loffset);
                xop->finished = 1;
                hammer2_mtx_unlock(&xop->lock);
@@ -600,7 +599,6 @@ hammer2_strategy_xop_write(hammer2_thread_t *thr, hammer2_xop_t *arg)
        hammer2_write_file_core(bio_data, ip, &parent,
                                lbase, IO_ASYNC, pblksize,
                                xop->head.mtid, &error);
-       error = hammer2_error_to_errno(error);
        if (parent) {
                hammer2_chain_unlock(parent);
                hammer2_chain_drop(parent);
@@ -630,7 +628,7 @@ hammer2_strategy_xop_write(hammer2_thread_t *thr, hammer2_xop_t *arg)
         */
        error = hammer2_xop_collect(&xop->head, HAMMER2_XOP_COLLECT_NOWAIT);
 
-       if (error == EINPROGRESS) {
+       if (error == HAMMER2_ERROR_EINPROGRESS) {
                hammer2_mtx_unlock(&xop->lock);
                return;
        }
@@ -644,7 +642,7 @@ hammer2_strategy_xop_write(hammer2_thread_t *thr, hammer2_xop_t *arg)
        bio = xop->bio;         /* now owned by us */
        bp = bio->bio_buf;      /* now owned by us */
 
-       if (error == ENOENT || error == 0) {
+       if (error == HAMMER2_ERROR_ENOENT || error == 0) {
                bp->b_flags |= B_NOTMETA;
                bp->b_resid = 0;
                bp->b_error = 0;
@@ -672,10 +670,14 @@ hammer2_bioq_sync(hammer2_pfs_t *pmp)
 }
 
 /* 
- * Create a new cluster at (cparent, lbase) and assign physical storage,
- * returning a cluster suitable for I/O.  The cluster will be in a modified
- * state.  Any chain->error will be rolled up into *errorp, but still
- * returned.  Caller must check *errorp.  Caller need not check chain->error.
+ * Assign physical storage at (cparent, lbase), returning a suitable chain
+ * and setting *errorp appropriately.
+ *
+ * If no error occurs, the returned chain will be in a modified state.
+ *
+ * If an error occurs, the returned chain may or may not be NULL.  If
+ * not-null any chain->error (if not 0) will also be rolled up into *errorp.
+ * So the caller only needs to test *errorp.
  *
  * cparent can wind up being anything.
  *
@@ -703,7 +705,7 @@ hammer2_assign_physical(hammer2_inode_t *ip, hammer2_chain_t **parentp,
         * logical buffer cache buffer.
         */
        KKASSERT(pblksize >= HAMMER2_ALLOC_MIN);
-retry:
+
        chain = hammer2_chain_lookup(parentp, &key_dummy,
                                     lbase, lbase,
                                     errorp,
@@ -732,19 +734,16 @@ retry:
                 */
                dedup_off = hammer2_dedup_lookup((*parentp)->hmp, datap,
                                                 pblksize);
-               *errorp = hammer2_chain_create(parentp, &chain,
-                                              ip->pmp,
+               *errorp |= hammer2_chain_create(parentp, &chain,
+                                               ip->pmp,
                                       HAMMER2_ENC_CHECK(ip->meta.check_algo) |
                                       HAMMER2_ENC_COMP(HAMMER2_COMP_NONE),
-                                              lbase, HAMMER2_PBUFRADIX,
-                                              HAMMER2_BREF_TYPE_DATA,
-                                              pblksize, mtid,
-                                              dedup_off, 0);
-               if (chain == NULL) {
-                       panic("hammer2_chain_create: par=%p error=%d\n",
-                             *parentp, *errorp);
-                       goto retry;
-               }
+                                               lbase, HAMMER2_PBUFRADIX,
+                                               HAMMER2_BREF_TYPE_DATA,
+                                               pblksize, mtid,
+                                               dedup_off, 0);
+               if (chain == NULL)
+                       goto failed;
                /*ip->delta_dcount += pblksize;*/
        } else if (chain->error == 0) {
                switch (chain->bref.type) {
@@ -753,16 +752,18 @@ retry:
                         * The data is embedded in the inode, which requires
                         * a bit more finess.
                         */
-                       hammer2_chain_modify_ip(ip, chain, mtid, 0);
+                       *errorp |= hammer2_chain_modify_ip(ip, chain, mtid, 0);
                        break;
                case HAMMER2_BREF_TYPE_DATA:
                        dedup_off = hammer2_dedup_lookup(chain->hmp, datap,
                                                         pblksize);
                        if (chain->bytes != pblksize) {
-                               hammer2_chain_resize(chain,
+                               *errorp |= hammer2_chain_resize(chain,
                                                     mtid, dedup_off,
                                                     pradix,
                                                     HAMMER2_MODIFY_OPTDATA);
+                               if (*errorp)
+                                       break;
                        }
 
                        /*
@@ -772,8 +773,8 @@ retry:
                         * after resizing in case this is an encrypted or
                         * compressed buffer.
                         */
-                       hammer2_chain_modify(chain, mtid, dedup_off,
-                                            HAMMER2_MODIFY_OPTDATA);
+                       *errorp |= hammer2_chain_modify(chain, mtid, dedup_off,
+                                                       HAMMER2_MODIFY_OPTDATA);
                        break;
                default:
                        panic("hammer2_assign_physical: bad type");
@@ -783,6 +784,7 @@ retry:
        } else {
                *errorp = chain->error;
        }
+failed:
        return (chain);
 }
 
@@ -1028,20 +1030,20 @@ hammer2_compress_and_write(char *data, hammer2_inode_t *ip,
                                        mtid, &bdata, errorp);
 
        if (*errorp) {
-               kprintf("WRITE PATH: An error occurred while "
-                       "assigning physical space.\n");
-               KKASSERT(chain == NULL);
                goto done;
        }
 
        if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
                hammer2_inode_data_t *wipdata;
 
-               hammer2_chain_modify_ip(ip, chain, mtid, 0);
-               wipdata = &chain->data->ipdata;
-               KKASSERT(wipdata->meta.op_flags & HAMMER2_OPFLAG_DIRECTDATA);
-               bcopy(data, wipdata->u.data, HAMMER2_EMBEDDED_BYTES);
-               ++hammer2_iod_file_wembed;
+               *errorp = hammer2_chain_modify_ip(ip, chain, mtid, 0);
+               if (*errorp == 0) {
+                       wipdata = &chain->data->ipdata;
+                       KKASSERT(wipdata->meta.op_flags &
+                                HAMMER2_OPFLAG_DIRECTDATA);
+                       bcopy(data, wipdata->u.data, HAMMER2_EMBEDDED_BYTES);
+                       ++hammer2_iod_file_wembed;
+               }
        } else if (bdata == NULL) {
                /*
                 * Live deduplication, a copy of the data is already present
@@ -1248,13 +1250,19 @@ zero_write(char *data, hammer2_inode_t *ip,
                if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
                        hammer2_inode_data_t *wipdata;
 
-                       hammer2_chain_modify_ip(ip, chain, mtid, 0);
-                       wipdata = &chain->data->ipdata;
-                       KKASSERT(wipdata->meta.op_flags &
-                                HAMMER2_OPFLAG_DIRECTDATA);
-                       bzero(wipdata->u.data, HAMMER2_EMBEDDED_BYTES);
-                       ++hammer2_iod_file_wembed;
+                       if (*errorp == 0) {
+                               *errorp = hammer2_chain_modify_ip(ip, chain,
+                                                                 mtid, 0);
+                       }
+                       if (*errorp == 0) {
+                               wipdata = &chain->data->ipdata;
+                               KKASSERT(wipdata->meta.op_flags &
+                                        HAMMER2_OPFLAG_DIRECTDATA);
+                               bzero(wipdata->u.data, HAMMER2_EMBEDDED_BYTES);
+                               ++hammer2_iod_file_wembed;
+                       }
                } else {
+                       /* chain->error ok for deletion */
                        hammer2_chain_delete(*parentp, chain,
                                             mtid, HAMMER2_DELETE_PERMANENT);
                        ++hammer2_iod_file_wzero;
@@ -1352,7 +1360,6 @@ hammer2_write_bp(hammer2_chain_t *chain, char *data, int ioflag,
                error = 0;
                break;
        }
-       KKASSERT(error == 0);   /* XXX TODO */
        *errorp = error;
 }
 
index b85b712..c67f9cd 100644 (file)
@@ -423,24 +423,43 @@ hammer2_signal_check(time_t *timep)
 const char *
 hammer2_error_str(int error)
 {
-       const char *str;
-
-       switch(error) {
-       case HAMMER2_ERROR_NONE:
-               str = "0";
-               break;
-       case HAMMER2_ERROR_IO:
-               str = "I/O";
-               break;
-       case HAMMER2_ERROR_CHECK:
-               str = "check/crc";
-               break;
-       case HAMMER2_ERROR_INCOMPLETE:
-               str = "incomplete-node";
-               break;
-       default:
-               str = "unknown";
-               break;
-       }
-       return (str);
+       if (error & HAMMER2_ERROR_EIO)
+               return("I/O Error");
+       if (error & HAMMER2_ERROR_CHECK)
+               return("Check Error");
+       if (error & HAMMER2_ERROR_INCOMPLETE)
+               return("Cluster Quorum Error");
+       if (error & HAMMER2_ERROR_DEPTH)
+               return("Chain Depth Error");
+       if (error & HAMMER2_ERROR_BADBREF)
+               return("Bad Blockref Error");
+       if (error & HAMMER2_ERROR_ENOSPC)
+               return("No Space on Device");
+       if (error & HAMMER2_ERROR_ENOENT)
+               return("Entry Not Found");
+       if (error & HAMMER2_ERROR_ENOTEMPTY)
+               return("Directory Not Empty");
+       if (error & HAMMER2_ERROR_EAGAIN)
+               return("EAGAIN");
+       if (error & HAMMER2_ERROR_ENOTDIR)
+               return("Not a Directory");
+       if (error & HAMMER2_ERROR_EISDIR)
+               return("Is a Directory");
+       if (error & HAMMER2_ERROR_EINPROGRESS)
+               return("Operation in Progress");
+       if (error & HAMMER2_ERROR_ABORTED)
+               return("Operation Aborted");
+       if (error & HAMMER2_ERROR_EOF)
+               return("Operation Complete");
+       if (error & HAMMER2_ERROR_EINVAL)
+               return("Invalid Operation");
+       if (error & HAMMER2_ERROR_EEXIST)
+               return("Object Exists");
+       if (error & HAMMER2_ERROR_EDEADLK)
+               return("Deadlock Detected");
+       if (error & HAMMER2_ERROR_ESRCH)
+               return("Object Not Found");
+       if (error & HAMMER2_ERROR_ETIMEDOUT)
+               return("Timeout");
+       return("Unknown Error");
 }
index 2b043d5..f04f26f 100644 (file)
@@ -186,7 +186,7 @@ hammer2_primary_sync_thread(void *arg)
                                kprintf("sync_slaves error %d defer %p\n",
                                        error, list.base);
                        }
-                       if (error != EAGAIN)
+                       if (error != HAMMER2_ERROR_EAGAIN)
                                break;
                        while ((defer = list.base) != NULL) {
                                hammer2_inode_t *nip;
@@ -194,8 +194,11 @@ hammer2_primary_sync_thread(void *arg)
                                nip = defer->ip;
                                error = hammer2_sync_slaves(thr, nip, &list,
                                                        (nip == pmp->iroot));
-                               if (error && error != EAGAIN && error != ENOENT)
+                               if (error &&
+                                   error != HAMMER2_ERROR_EAGAIN &&
+                                   error != HAMMER2_ERROR_ENOENT) {
                                        break;
+                               }
                                if (hammer2_thr_break(thr)) {
                                        didbreak = 1;
                                        break;
@@ -220,7 +223,8 @@ hammer2_primary_sync_thread(void *arg)
                         * If the thread is being remastered, frozen, or
                         * stopped, clean up any left-over deferals.
                         */
-                       if (didbreak || (error && error != EAGAIN)) {
+                       if (didbreak ||
+                           (error && error != HAMMER2_ERROR_EAGAIN)) {
                                kprintf("didbreak\n");
                                while ((defer = list.base) != NULL) {
                                        --list.count;
@@ -228,8 +232,8 @@ hammer2_primary_sync_thread(void *arg)
                                        list.base = defer->next;
                                        kfree(defer, M_HAMMER2);
                                }
-                               if (error == 0 || error == EAGAIN)
-                                       error = EINPROGRESS;
+                               if (error == 0 || error == HAMMER2_ERROR_EAGAIN)
+                                       error = HAMMER2_ERROR_EINPROGRESS;
                                break;
                        }
                }
@@ -237,7 +241,7 @@ hammer2_primary_sync_thread(void *arg)
                hammer2_inode_drop(pmp->iroot);
                hammer2_trans_done(pmp);
 
-               if (error && error != EINPROGRESS)
+               if (error && error != HAMMER2_ERROR_EINPROGRESS)
                        kprintf("hammer2_sync_slaves: error %d\n", error);
 
                /*
@@ -458,7 +462,6 @@ hammer2_sync_slaves(hammer2_thread_t *thr, hammer2_inode_t *ip,
                                     HAMMER2_LOOKUP_SHARED |
                                     HAMMER2_LOOKUP_NODIRECT |
                                     HAMMER2_LOOKUP_NODATA);
-       serror = hammer2_error_to_errno(serror);
        merror = hammer2_xop_collect(&xop->head, 0);
        if (hammer2_debug & 0x8000) {
                kprintf("START_SCAN IP=%016jx chain=%p (%016jx)\n",
@@ -477,15 +480,15 @@ hammer2_sync_slaves(hammer2_thread_t *thr, hammer2_inode_t *ip,
                int dodefer = 0;
                hammer2_chain_t *focus;
 
-               if (chain == NULL && merror == ENOENT)
+               if (chain == NULL && merror == HAMMER2_ERROR_ENOENT)
                        break;
-               if (merror && merror != ENOENT)
+               if (merror && merror != HAMMER2_ERROR_ENOENT)
                        break;
 
                /*
                 * Compare
                 */
-               if (chain && merror == ENOENT) {
+               if (chain && merror == HAMMER2_ERROR_ENOENT) {
                        /*
                         * If we have local chains but the XOP scan is done,
                         * the chains need to be deleted.
@@ -614,7 +617,7 @@ hammer2_sync_slaves(hammer2_thread_t *thr, hammer2_inode_t *ip,
                /*
                 * If at least one deferral was added and the deferral
                 * list has grown too large, stop adding more.  This
-                * will trigger an EAGAIN return.
+                * will trigger an HAMMER2_ERROR_EAGAIN return.
                 */
                if (needrescan && list->count > 1000)
                        break;
@@ -632,7 +635,6 @@ hammer2_sync_slaves(hammer2_thread_t *thr, hammer2_inode_t *ip,
                                                   HAMMER2_LOOKUP_SHARED |
                                                   HAMMER2_LOOKUP_NODIRECT |
                                                   HAMMER2_LOOKUP_NODATA);
-                       serror = hammer2_error_to_errno(serror);
                }
        }
        hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
@@ -652,8 +654,8 @@ hammer2_sync_slaves(hammer2_thread_t *thr, hammer2_inode_t *ip,
         * NOTE: In this situation we do not yet want to synchronize our
         *       inode, setting the error code also has that effect.
         */
-       if ((merror == 0 || merror == ENOENT) && needrescan)
-               merror = EAGAIN;
+       if ((merror == 0 || merror == HAMMER2_ERROR_ENOENT) && needrescan)
+               merror = HAMMER2_ERROR_EAGAIN;
 
        /*
         * If no error occurred we can synchronize the inode meta-data
@@ -661,7 +663,7 @@ hammer2_sync_slaves(hammer2_thread_t *thr, hammer2_inode_t *ip,
         *
         * XXX inode lock was lost
         */
-       if (merror == 0 || merror == ENOENT) {
+       if (merror == 0 || merror == HAMMER2_ERROR_ENOENT) {
                hammer2_xop_ipcluster_t *xop2;
                hammer2_chain_t *focus;
 
@@ -759,7 +761,9 @@ hammer2_sync_insert(hammer2_thread_t *thr,
                                     focus->bref.type, focus->bytes,
                                     mtid, 0, 0);
        if (error == 0) {
-               hammer2_chain_modify(chain, mtid, 0, 0);
+               error = hammer2_chain_modify(chain, mtid, 0, 0);
+               if (error)
+                       goto failed;
 
                /*
                 * Copy focus to new chain
@@ -810,6 +814,7 @@ hammer2_sync_insert(hammer2_thread_t *thr,
                }
        }
 
+failed:
        if (chain)
                hammer2_chain_unlock(chain);    /* unlock, leave ref */
        *chainp = chain;                        /* will be returned locked */
@@ -902,6 +907,7 @@ hammer2_sync_replace(hammer2_thread_t *thr,
 {
        uint8_t otype;
        int nradix;
+       int error;
 
 #if HAMMER2_SYNCHRO_DEBUG
        if (hammer2_debug & 1)
@@ -912,13 +918,18 @@ hammer2_sync_replace(hammer2_thread_t *thr,
 #endif
        hammer2_chain_unlock(chain);
        hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
-       if (chain->error == 0) {
+       error = chain->error;
+       if (error == 0) {
                if (chain->bytes != focus->bytes) {
                        /* XXX what if compressed? */
                        nradix = hammer2_getradix(chain->bytes);
-                       hammer2_chain_resize(chain, mtid, 0, nradix, 0);
+                       error = hammer2_chain_resize(chain, mtid, 0, nradix, 0);
+                       if (error)
+                               goto failed;
                }
-               hammer2_chain_modify(chain, mtid, 0, 0);
+               error = hammer2_chain_modify(chain, mtid, 0, 0);
+               if (error)
+                       goto failed;
                otype = chain->bref.type;
                chain->bref.type = focus->bref.type;
                chain->bref.methods = focus->bref.methods;
@@ -1031,9 +1042,10 @@ hammer2_sync_replace(hammer2_thread_t *thr,
                }
        }
 
+failed:
        hammer2_chain_unlock(chain);
        hammer2_chain_lock(chain, HAMMER2_RESOLVE_SHARED |
                                  HAMMER2_RESOLVE_MAYBE);
 
-       return 0;
+       return error;
 }
index 40f6c60..004ae86 100644 (file)
@@ -1940,6 +1940,16 @@ hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
                                        mp->mnt_vstat.f_bsize;
                mp->mnt_stat.f_bavail = mp->mnt_stat.f_bfree;
 
+               if (cred && cred->cr_uid != 0) {
+                       uint64_t adj;
+
+                       /* 5% */
+                       adj = hmp->free_reserved / mp->mnt_vstat.f_bsize;
+                       mp->mnt_stat.f_blocks -= adj;
+                       mp->mnt_stat.f_bfree -= adj;
+                       mp->mnt_stat.f_bavail -= adj;
+               }
+
                *sbp = mp->mnt_stat;
        }
        return (0);
@@ -1984,6 +1994,16 @@ hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred)
                                        mp->mnt_vstat.f_bsize;
                mp->mnt_vstat.f_bavail = mp->mnt_vstat.f_bfree;
 
+               if (cred && cred->cr_uid != 0) {
+                       uint64_t adj;
+
+                       /* 5% */
+                       adj = hmp->free_reserved / mp->mnt_vstat.f_bsize;
+                       mp->mnt_vstat.f_blocks -= adj;
+                       mp->mnt_vstat.f_bfree -= adj;
+                       mp->mnt_vstat.f_bavail -= adj;
+               }
+
                *sbp = mp->mnt_vstat;
        }
        return (0);
@@ -2299,8 +2319,10 @@ hammer2_vfs_sync(struct mount *mp, int waitfor)
                error = hammer2_xop_collect(&xop->head,
                                            HAMMER2_XOP_COLLECT_WAITALL);
                hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
-               if (error == ENOENT)
+               if (error == HAMMER2_ERROR_ENOENT)
                        error = 0;
+               else
+                       error = hammer2_error_to_errno(error);
        } else {
                error = 0;
        }
@@ -2524,6 +2546,7 @@ hammer2_install_volume_header(hammer2_dev_t *hmp)
        }
        if (valid) {
                hmp->volsync = hmp->voldata;
+               hmp->free_reserved = hmp->voldata.allocator_size / 20;
                error = 0;
                if (error_reported || bootverbose || 1) { /* 1/DEBUG */
                        kprintf("hammer2: using volume header #%d\n",
@@ -2684,6 +2707,65 @@ hammer2_pfs_memory_wakeup(hammer2_pfs_t *pmp)
                wakeup(&pmp->inmem_dirty_chains);
 }
 
+/*
+ * Returns 0 if the filesystem has tons of free space
+ * Returns 1 if the filesystem has less than 10% remaining
+ * Returns 2 if the filesystem has less than 2%/5% (user/root) remaining.
+ */
+int
+hammer2_vfs_enospace(hammer2_inode_t *ip, off_t bytes, struct ucred *cred)
+{
+       hammer2_pfs_t *pmp;
+       hammer2_dev_t *hmp;
+       hammer2_off_t free_reserved;
+       hammer2_off_t free_nominal;
+       int i;
+
+       pmp = ip->pmp;
+
+       if (pmp->free_ticks == 0 || pmp->free_ticks != ticks) {
+               free_reserved = HAMMER2_SEGSIZE;
+               free_nominal = 0x7FFFFFFFFFFFFFFFLLU;
+               for (i = 0; i < pmp->iroot->cluster.nchains; ++i) {
+                       hmp = pmp->pfs_hmps[i];
+                       if (hmp == NULL)
+                               continue;
+                       if (pmp->pfs_types[i] != HAMMER2_PFSTYPE_MASTER &&
+                           pmp->pfs_types[i] != HAMMER2_PFSTYPE_SOFT_MASTER)
+                               continue;
+
+                       if (free_nominal > hmp->voldata.allocator_free)
+                               free_nominal = hmp->voldata.allocator_free;
+                       if (free_reserved < hmp->free_reserved)
+                               free_reserved = hmp->free_reserved;
+               }
+
+               /*
+                * SMP races ok
+                */
+               pmp->free_reserved = free_reserved;
+               pmp->free_nominal = free_nominal;
+               pmp->free_ticks = ticks;
+       } else {
+               free_reserved = pmp->free_reserved;
+               free_nominal = pmp->free_nominal;
+       }
+       if (cred && cred->cr_uid != 0) {
+               if ((int64_t)(free_nominal - bytes) <
+                   (int64_t)free_reserved) {
+                       return 2;
+               }
+       } else {
+               if ((int64_t)(free_nominal - bytes) <
+                   (int64_t)free_reserved / 2) {
+                       return 2;
+               }
+       }
+       if ((int64_t)(free_nominal - bytes) < (int64_t)free_reserved * 2)
+               return 1;
+       return 0;
+}
+
 /*
  * Debugging
  */
index ba6d1fb..23770f2 100644 (file)
@@ -345,7 +345,9 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
        ip = VTOI(vp);
 
        if (ip->pmp->ronly)
-               return(EROFS);
+               return (EROFS);
+       if (hammer2_vfs_enospace(ip, 0, ap->a_cred) > 1)
+               return (ENOSPC);
 
        hammer2_pfs_memory_wait(ip->pmp);
        hammer2_trans_init(ip->pmp, 0);
@@ -585,8 +587,10 @@ hammer2_vop_readdir(struct vop_readdir_args *ap)
                int dtype;
 
                error = hammer2_xop_collect(&xop->head, 0);
-               if (error)
+               error = hammer2_error_to_errno(error);
+               if (error) {
                        break;
+               }
                if (cookie_index == ncookies)
                        break;
                if (hammer2_debug & 0x0020)
@@ -729,6 +733,7 @@ hammer2_vop_write(struct vop_write_args *ap)
        struct uio *uio;
        int error;
        int seqcount;
+       int ioflag;
 
        /*
         * Read operations supported on this vnode?
@@ -741,13 +746,22 @@ hammer2_vop_write(struct vop_write_args *ap)
         * Misc
         */
        ip = VTOI(vp);
+       ioflag = ap->a_ioflag;
        uio = ap->a_uio;
        error = 0;
-       if (ip->pmp->ronly) {
+       if (ip->pmp->ronly)
                return (EROFS);
+       switch (hammer2_vfs_enospace(ip, uio->uio_resid, ap->a_cred)) {
+       case 2:
+               return (ENOSPC);
+       case 1:
+               ioflag |= IO_DIRECT;    /* semi-synchronous */
+               /* fall through */
+       default:
+               break;
        }
 
-       seqcount = ap->a_ioflag >> 16;
+       seqcount = ioflag >> 16;
 
        /*
         * Check resource limit
@@ -771,7 +785,7 @@ hammer2_vop_write(struct vop_write_args *ap)
                hammer2_trans_init(ip->pmp, HAMMER2_TRANS_BUFCACHE);
        else
                hammer2_trans_init(ip->pmp, 0);
-       error = hammer2_write_file(ip, uio, ap->a_ioflag, seqcount);
+       error = hammer2_write_file(ip, uio, ioflag, seqcount);
        hammer2_trans_done(ip->pmp);
 
        return (error);
@@ -1191,6 +1205,7 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap)
        hammer2_xop_start(&xop->head, hammer2_xop_nresolve);
 
        error = hammer2_xop_collect(&xop->head, 0);
+       error = hammer2_error_to_errno(error);
        if (error) {
                ip = NULL;
        } else {
@@ -1215,7 +1230,7 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap)
         *          will handle it properly.
         */
        if (ip) {
-               vp = hammer2_igetv(ip, &error);
+               vp = hammer2_igetv(ip, &error); /* error set to UNIX error */
                if (error == 0) {
                        vn_unlock(vp);
                        cache_setvp(ap->a_nch, vp);
@@ -1279,6 +1294,8 @@ hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
        dip = VTOI(ap->a_dvp);
        if (dip->pmp->ronly)
                return (EROFS);
+       if (hammer2_vfs_enospace(dip, 0, ap->a_cred) > 1)
+               return (ENOSPC);
 
        ncp = ap->a_nch->ncp;
        name = ncp->nc_name;
@@ -1298,13 +1315,19 @@ hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
                                   NULL, 0, inum,
                                   inum, 0, 0,
                                   0, &error);
-       if (error == 0) {
+       if (error) {
+               error = hammer2_error_to_errno(error);
+       } else {
                error = hammer2_dirent_create(dip, name, name_len,
                                              nip->meta.inum, nip->meta.type);
+               /* returns UNIX error code */
        }
-
        if (error) {
-               KKASSERT(nip == NULL);
+               if (nip) {
+                       hammer2_inode_unlink_finisher(nip, 0);
+                       hammer2_inode_unlock(nip);
+                       nip = NULL;
+               }
                *ap->a_vpp = NULL;
        } else {
                *ap->a_vpp = hammer2_igetv(nip, &error);
@@ -1384,6 +1407,8 @@ hammer2_vop_nlink(struct vop_nlink_args *ap)
        tdip = VTOI(ap->a_dvp);
        if (tdip->pmp->ronly)
                return (EROFS);
+       if (hammer2_vfs_enospace(tdip, 0, ap->a_cred) > 1)
+               return (ENOSPC);
 
        ncp = ap->a_nch->ncp;
        name = ncp->nc_name;
@@ -1473,6 +1498,8 @@ hammer2_vop_ncreate(struct vop_ncreate_args *ap)
        dip = VTOI(ap->a_dvp);
        if (dip->pmp->ronly)
                return (EROFS);
+       if (hammer2_vfs_enospace(dip, 0, ap->a_cred) > 1)
+               return (ENOSPC);
 
        ncp = ap->a_nch->ncp;
        name = ncp->nc_name;
@@ -1497,7 +1524,11 @@ hammer2_vop_ncreate(struct vop_ncreate_args *ap)
                                              nip->meta.inum, nip->meta.type);
        }
        if (error) {
-               KKASSERT(nip == NULL);
+               if (nip) {
+                       hammer2_inode_unlink_finisher(nip, 0);
+                       hammer2_inode_unlock(nip);
+                       nip = NULL;
+               }
                *ap->a_vpp = NULL;
        } else {
                *ap->a_vpp = hammer2_igetv(nip, &error);
@@ -1545,6 +1576,8 @@ hammer2_vop_nmknod(struct vop_nmknod_args *ap)
        dip = VTOI(ap->a_dvp);
        if (dip->pmp->ronly)
                return (EROFS);
+       if (hammer2_vfs_enospace(dip, 0, ap->a_cred) > 1)
+               return (ENOSPC);
 
        ncp = ap->a_nch->ncp;
        name = ncp->nc_name;
@@ -1564,10 +1597,12 @@ hammer2_vop_nmknod(struct vop_nmknod_args *ap)
                error = hammer2_dirent_create(dip, name, name_len,
                                              nip->meta.inum, nip->meta.type);
        }
-
-
        if (error) {
-               KKASSERT(nip == NULL);
+               if (nip) {
+                       hammer2_inode_unlink_finisher(nip, 0);
+                       hammer2_inode_unlock(nip);
+                       nip = NULL;
+               }
                *ap->a_vpp = NULL;
        } else {
                *ap->a_vpp = hammer2_igetv(nip, &error);
@@ -1615,6 +1650,8 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
        dip = VTOI(ap->a_dvp);
        if (dip->pmp->ronly)
                return (EROFS);
+       if (hammer2_vfs_enospace(dip, 0, ap->a_cred) > 1)
+               return (ENOSPC);
 
        ncp = ap->a_nch->ncp;
        name = ncp->nc_name;
@@ -1638,10 +1675,12 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
                error = hammer2_dirent_create(dip, name, name_len,
                                              nip->meta.inum, nip->meta.type);
        }
-
-
        if (error) {
-               KKASSERT(nip == NULL);
+               if (nip) {
+                       hammer2_inode_unlink_finisher(nip, 0);
+                       hammer2_inode_unlock(nip);
+                       nip = NULL;
+               }
                *ap->a_vpp = NULL;
                hammer2_trans_done(dip->pmp);
                return error;
@@ -1718,7 +1757,9 @@ hammer2_vop_nremove(struct vop_nremove_args *ap)
 
        dip = VTOI(ap->a_dvp);
        if (dip->pmp->ronly)
-               return(EROFS);
+               return (EROFS);
+       if (hammer2_vfs_enospace(dip, 0, ap->a_cred) > 1)
+               return (ENOSPC);
 
        ncp = ap->a_nch->ncp;
 
@@ -1753,6 +1794,7 @@ hammer2_vop_nremove(struct vop_nremove_args *ap)
         * (else it has already been removed).
         */
        error = hammer2_xop_collect(&xop->head, 0);
+       error = hammer2_error_to_errno(error);
        hammer2_inode_unlock(dip);
 
        if (error == 0) {
@@ -1804,7 +1846,9 @@ hammer2_vop_nrmdir(struct vop_nrmdir_args *ap)
 
        dip = VTOI(ap->a_dvp);
        if (dip->pmp->ronly)
-               return(EROFS);
+               return (EROFS);
+       if (hammer2_vfs_enospace(dip, 0, ap->a_cred) > 1)
+               return (ENOSPC);
 
        hammer2_pfs_memory_wait(dip->pmp);
        hammer2_trans_init(dip->pmp, 0);
@@ -1825,6 +1869,7 @@ hammer2_vop_nrmdir(struct vop_nrmdir_args *ap)
         * (else it has already been removed).
         */
        error = hammer2_xop_collect(&xop->head, 0);
+       error = hammer2_error_to_errno(error);
        hammer2_inode_unlock(dip);
 
        if (error == 0) {
@@ -1891,7 +1936,9 @@ hammer2_vop_nrename(struct vop_nrename_args *ap)
        tdip = VTOI(ap->a_tdvp);        /* target directory */
 
        if (fdip->pmp->ronly)
-               return(EROFS);
+               return (EROFS);
+       if (hammer2_vfs_enospace(fdip, 0, ap->a_cred) > 1)
+               return (ENOSPC);
 
        fncp = ap->a_fnch->ncp;         /* entry name in source */
        fname = fncp->nc_name;
@@ -1976,6 +2023,7 @@ hammer2_vop_nrename(struct vop_nrename_args *ap)
                 * (else it has already been removed).
                 */
                tnch_error = hammer2_xop_collect(&xop2->head, 0);
+               tnch_error = hammer2_error_to_errno(tnch_error);
                /* hammer2_inode_unlock(tdip); */
 
                if (tnch_error == 0) {
@@ -2019,6 +2067,7 @@ hammer2_vop_nrename(struct vop_nrename_args *ap)
                                break;
                        ++tlhc;
                }
+               error = hammer2_error_to_errno(error);
                hammer2_xop_retire(&sxop->head, HAMMER2_XOPMASK_VOP);
 
                if (error) {
@@ -2054,6 +2103,7 @@ hammer2_vop_nrename(struct vop_nrename_args *ap)
                hammer2_xop_start(&xop4->head, hammer2_xop_nrename);
 
                error = hammer2_xop_collect(&xop4->head, 0);
+               error = hammer2_error_to_errno(error);
                hammer2_xop_retire(&xop4->head, HAMMER2_XOPMASK_VOP);
 
                if (error == ENOENT)
@@ -2288,7 +2338,7 @@ hammer2_vop_markatime(struct vop_markatime_args *ap)
        ip = VTOI(vp);
 
        if (ip->pmp->ronly)
-               return(EROFS);
+               return (EROFS);
        return(0);
 }
 
index cb5fc96..e87ce00 100644 (file)
@@ -62,8 +62,6 @@
 
 /*
  * Determine if the specified directory is empty.  Returns 0 on success.
- *
- * May return 0, ENOTDIR, or EAGAIN.
  */
 static
 int
@@ -86,7 +84,6 @@ checkdirempty(hammer2_chain_t *oparent, hammer2_chain_t *ochain, int clindex)
                error = hammer2_chain_inode_find(chain->pmp, inum,
                                                 clindex, 0,
                                                 &parent, &chain);
-               error = hammer2_error_to_errno(error);
                if (parent) {
                        hammer2_chain_unlock(parent);
                        hammer2_chain_drop(parent);
@@ -98,7 +95,7 @@ checkdirempty(hammer2_chain_t *oparent, hammer2_chain_t *ochain, int clindex)
                                        hammer2_chain_unlock(chain);
                                        hammer2_chain_drop(chain);
                                }
-                               return EAGAIN;
+                               return HAMMER2_ERROR_EAGAIN;
                        }
                }
        }
@@ -116,11 +113,9 @@ checkdirempty(hammer2_chain_t *oparent, hammer2_chain_t *ochain, int clindex)
                                             &error, 0);
        }
        if (chain) {
-               error = ENOTEMPTY;
+               error = HAMMER2_ERROR_ENOTEMPTY;
                hammer2_chain_unlock(chain);
                hammer2_chain_drop(chain);
-       } else {
-               error = hammer2_error_to_errno(error); /* may be 0 */
        }
        hammer2_chain_lookup_done(parent);
 
@@ -146,7 +141,7 @@ hammer2_xop_ipcluster(hammer2_thread_t *thr, hammer2_xop_t *arg)
        if (chain)
                error = chain->error;
        else
-               error = EIO;
+               error = HAMMER2_ERROR_EIO;
                
        hammer2_xop_feed(&xop->head, chain, thr->clindex, error);
        if (chain) {
@@ -203,7 +198,6 @@ hammer2_xop_readdir(hammer2_thread_t *thr, hammer2_xop_t *arg)
                                           key_next, HAMMER2_KEY_MAX,
                                           &error, HAMMER2_LOOKUP_SHARED);
        }
-       error = hammer2_error_to_errno(error);
 break2:
        if (chain) {
                hammer2_chain_unlock(chain);
@@ -236,7 +230,7 @@ hammer2_xop_nresolve(hammer2_thread_t *thr, hammer2_xop_t *arg)
        if (parent == NULL) {
                kprintf("xop_nresolve: NULL parent\n");
                chain = NULL;
-               error = EIO;
+               error = HAMMER2_ERROR_EIO;
                goto done;
        }
        name = xop->head.name1;
@@ -261,7 +255,6 @@ hammer2_xop_nresolve(hammer2_thread_t *thr, hammer2_xop_t *arg)
                                           HAMMER2_LOOKUP_ALWAYS |
                                           HAMMER2_LOOKUP_SHARED);
        }
-       error = hammer2_error_to_errno(error);
 
        /*
         * If the entry is a hardlink pointer, resolve it.
@@ -279,7 +272,6 @@ hammer2_xop_nresolve(hammer2_thread_t *thr, hammer2_xop_t *arg)
        } else if (chain && error == 0) {
                error = chain->error;
        }
-       error = hammer2_error_to_errno(error);
 done:
        error = hammer2_xop_feed(&xop->head, chain, thr->clindex, error);
        if (chain) {
@@ -326,7 +318,7 @@ again:
        chain = NULL;
        if (parent == NULL) {
                kprintf("xop_nresolve: NULL parent\n");
-               error = EIO;
+               error = HAMMER2_ERROR_EIO;
                goto done;
        }
        name = xop->head.name1;
@@ -390,17 +382,17 @@ again:
                if (type == HAMMER2_OBJTYPE_DIRECTORY && doforce) {
                        /*
                         * If doforce then execute the operation even if
-                        * the directory is not empty.
+                        * the directory is not empty or errored.
                         */
-                       error = hammer2_error_to_errno(chain->error);
-                       hammer2_chain_delete(parent, chain,
+                       /* ignore chain->error */
+                       error = hammer2_chain_delete(parent, chain,
                                             xop->head.mtid, dopermanent);
                } else if (type == HAMMER2_OBJTYPE_DIRECTORY &&
                           (error = checkdirempty(parent, chain, thr->clindex)) != 0) {
                        /*
                         * error may be EAGAIN or ENOTEMPTY
                         */
-                       if (error == EAGAIN) {
+                       if (error == HAMMER2_ERROR_EAGAIN) {
                                hammer2_chain_unlock(chain);
                                hammer2_chain_drop(chain);
                                hammer2_chain_unlock(parent);
@@ -409,23 +401,22 @@ again:
                        }
                } else if (type == HAMMER2_OBJTYPE_DIRECTORY &&
                    xop->isdir == 0) {
-                       error = ENOTDIR;
+                       error = HAMMER2_ERROR_ENOTDIR;
                } else if (type != HAMMER2_OBJTYPE_DIRECTORY &&
                           xop->isdir >= 1) {
-                       error = EISDIR;
+                       error = HAMMER2_ERROR_EISDIR;
                } else {
                        /*
                         * Delete the directory entry.  chain might also
                         * be a directly-embedded inode.
                         */
-                       error = hammer2_error_to_errno(chain->error);
+                       error = chain->error;
                        hammer2_chain_delete(parent, chain,
                                             xop->head.mtid, dopermanent);
                }
        } else {
                if (chain && error == 0)
                        error = chain->error;
-               error = hammer2_error_to_errno(error);
        }
 
        /*
@@ -442,7 +433,6 @@ again:
                error2 = hammer2_chain_inode_find(chain->pmp, lhc,
                                                  thr->clindex, 0,
                                                  &parent, &chain);
-               error2 = hammer2_error_to_errno(error2);
                if (error2) {
                        kprintf("inode_find: %016jx %p failed\n",
                                lhc, chain);
@@ -510,13 +500,13 @@ hammer2_xop_nrename(hammer2_thread_t *thr, hammer2_xop_t *arg)
                chain = hammer2_inode_chain(ip, thr->clindex,
                                            HAMMER2_RESOLVE_ALWAYS);
                if (chain == NULL) {
-                       error = EIO;
+                       error = HAMMER2_ERROR_EIO;
                        parent = NULL;
                        goto done;
                }
                parent = hammer2_chain_getparent(chain, HAMMER2_RESOLVE_ALWAYS);
                if (parent == NULL) {
-                       error = EIO;
+                       error = HAMMER2_ERROR_EIO;
                        goto done;
                }
        } else {
@@ -532,7 +522,7 @@ hammer2_xop_nrename(hammer2_thread_t *thr, hammer2_xop_t *arg)
                                             HAMMER2_RESOLVE_ALWAYS);
                if (parent == NULL) {
                        kprintf("xop_nrename: NULL parent\n");
-                       error = EIO;
+                       error = HAMMER2_ERROR_EIO;
                        goto done;
                }
                name = xop->head.name1;
@@ -555,7 +545,6 @@ hammer2_xop_nrename(hammer2_thread_t *thr, hammer2_xop_t *arg)
                                                   HAMMER2_LOOKUP_ALWAYS);
                }
        }
-       error = hammer2_error_to_errno(error);
 
        if (chain == NULL) {
                /* XXX shouldn't happen, but does under fsstress */
@@ -563,12 +552,12 @@ hammer2_xop_nrename(hammer2_thread_t *thr, hammer2_xop_t *arg)
                        xop->head.name1,
                        xop->head.name2);
                if (error == 0)
-                       error = ENOENT;
+                       error = HAMMER2_ERROR_ENOENT;
                goto done;
        }
 
        if (chain->error) {
-               error = hammer2_error_to_errno(chain->error);
+               error = chain->error;
                goto done;
        }
 
@@ -594,29 +583,41 @@ hammer2_xop_nrename(hammer2_thread_t *thr, hammer2_xop_t *arg)
                if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
                        hammer2_inode_data_t *wipdata;
 
-                       hammer2_chain_modify(chain, xop->head.mtid, 0, 0);
-                       wipdata = &chain->data->ipdata;
+                       error = hammer2_chain_modify(chain, xop->head.mtid,
+                                                    0, 0);
+                       if (error == 0) {
+                               wipdata = &chain->data->ipdata;
 
-                       bzero(wipdata->filename, sizeof(wipdata->filename));
-                       bcopy(xop->head.name2, wipdata->filename,
-                             xop->head.name2_len);
-                       wipdata->meta.name_key = xop->lhc;
-                       wipdata->meta.name_len = xop->head.name2_len;
+                               bzero(wipdata->filename,
+                                     sizeof(wipdata->filename));
+                               bcopy(xop->head.name2,
+                                     wipdata->filename,
+                                     xop->head.name2_len);
+                               wipdata->meta.name_key = xop->lhc;
+                               wipdata->meta.name_len = xop->head.name2_len;
+                       }
                }
                if (chain->bref.type == HAMMER2_BREF_TYPE_DIRENT) {
-                       if (xop->head.name2_len <= sizeof(chain->bref.check.buf)) {
+                       if (xop->head.name2_len <=
+                           sizeof(chain->bref.check.buf)) {
                                /*
                                 * Remove any related data buffer, we can
                                 * embed the filename in the bref itself.
                                 */
-                               hammer2_chain_resize(chain, xop->head.mtid, 0,
-                                                    0, 0);
-                               hammer2_chain_modify(chain, xop->head.mtid,
-                                                    0, 0);
-                               bzero(chain->bref.check.buf,
-                                     sizeof(chain->bref.check.buf));
-                               bcopy(xop->head.name2, chain->bref.check.buf,
-                                     xop->head.name2_len);
+                               error = hammer2_chain_resize(
+                                               chain, xop->head.mtid, 0, 0, 0);
+                               if (error == 0) {
+                                       error = hammer2_chain_modify(
+                                                       chain, xop->head.mtid,
+                                                       0, 0);
+                               }
+                               if (error == 0) {
+                                       bzero(chain->bref.check.buf,
+                                             sizeof(chain->bref.check.buf));
+                                       bcopy(xop->head.name2,
+                                             chain->bref.check.buf,
+                                             xop->head.name2_len);
+                               }
                        } else {
                                /*
                                 * Associate a data buffer with the bref.
@@ -624,16 +625,25 @@ hammer2_xop_nrename(hammer2_thread_t *thr, hammer2_xop_t *arg)
                                 * data buffer is not 64KB so use chain->bytes
                                 * instead of sizeof().
                                 */
-                               hammer2_chain_resize(chain, xop->head.mtid, 0,
-                                    hammer2_getradix(HAMMER2_ALLOC_MIN), 0);
-                               hammer2_chain_modify(chain, xop->head.mtid,
-                                                    0, 0);
-                               bzero(chain->data->buf, chain->bytes);
-                               bcopy(xop->head.name2,
-                                     chain->data->buf,
-                                     xop->head.name2_len);
+                               error = hammer2_chain_resize(
+                                       chain, xop->head.mtid, 0,
+                                       hammer2_getradix(HAMMER2_ALLOC_MIN), 0);
+                               if (error == 0) {
+                                       error = hammer2_chain_modify(
+                                                   chain, xop->head.mtid,
+                                                   0, 0);
+                               }
+                               if (error == 0) {
+                                       bzero(chain->data->buf, chain->bytes);
+                                       bcopy(xop->head.name2,
+                                             chain->data->buf,
+                                             xop->head.name2_len);
+                               }
+                       }
+                       if (error == 0) {
+                               chain->bref.embed.dirent.namlen =
+                                       xop->head.name2_len;
                        }
-                       chain->bref.embed.dirent.namlen = xop->head.name2_len;
                }
        }
 
@@ -646,10 +656,11 @@ hammer2_xop_nrename(hammer2_thread_t *thr, hammer2_xop_t *arg)
            chain->data->ipdata.meta.iparent != xop->head.ip3->meta.inum) {
                hammer2_inode_data_t *wipdata;
 
-               hammer2_chain_modify(chain, xop->head.mtid, 0, 0);
-               wipdata = &chain->data->ipdata;
-
-               wipdata->meta.iparent = xop->head.ip3->meta.inum;
+               error = hammer2_chain_modify(chain, xop->head.mtid, 0, 0);
+               if (error == 0) {
+                       wipdata = &chain->data->ipdata;
+                       wipdata->meta.iparent = xop->head.ip3->meta.inum;
+               }
        }
 
        /*
@@ -660,28 +671,30 @@ hammer2_xop_nrename(hammer2_thread_t *thr, hammer2_xop_t *arg)
        parent = hammer2_inode_chain(xop->head.ip3, thr->clindex,
                                     HAMMER2_RESOLVE_ALWAYS);
        if (parent == NULL) {
-               error = EIO;
+               error = HAMMER2_ERROR_EIO;
                goto done;
        }
 
-       tmp = hammer2_chain_lookup(&parent, &key_next,
-                                  xop->lhc & ~HAMMER2_DIRHASH_LOMASK,
-                                  xop->lhc | HAMMER2_DIRHASH_LOMASK,
-                                  &error,
-                                  HAMMER2_LOOKUP_ALWAYS);
-       while (tmp) {
-               if (hammer2_chain_dirent_test(tmp, xop->head.name2,
-                                             xop->head.name2_len)) {
-                       hammer2_chain_delete(parent, tmp, xop->head.mtid, 0);
+       if (error == 0) {
+               tmp = hammer2_chain_lookup(&parent, &key_next,
+                                          xop->lhc & ~HAMMER2_DIRHASH_LOMASK,
+                                          xop->lhc | HAMMER2_DIRHASH_LOMASK,
+                                          &error,
+                                          HAMMER2_LOOKUP_ALWAYS);
+               while (tmp) {
+                       if (hammer2_chain_dirent_test(tmp, xop->head.name2,
+                                                     xop->head.name2_len)) {
+                               hammer2_chain_delete(parent, tmp,
+                                                    xop->head.mtid, 0);
+                       }
+                       tmp = hammer2_chain_next(&parent, tmp, &key_next,
+                                                key_next,
+                                                xop->lhc |
+                                                 HAMMER2_DIRHASH_LOMASK,
+                                                &error,
+                                                HAMMER2_LOOKUP_ALWAYS);
                }
-               tmp = hammer2_chain_next(&parent, tmp, &key_next,
-                                        key_next,
-                                        xop->lhc | HAMMER2_DIRHASH_LOMASK,
-                                        &error,
-                                        HAMMER2_LOOKUP_ALWAYS);
        }
-       error = hammer2_error_to_errno(error);
-
        if (error == 0) {
                /*
                 * A relookup is required before the create to properly
@@ -730,7 +743,7 @@ hammer2_xop_scanlhc(hammer2_thread_t *thr, hammer2_xop_t *arg)
        if (parent == NULL) {
                kprintf("xop_nresolve: NULL parent\n");
                chain = NULL;
-               error = EIO;
+               error = HAMMER2_ERROR_EIO;
                goto done;
        }
 
@@ -759,7 +772,6 @@ hammer2_xop_scanlhc(hammer2_thread_t *thr, hammer2_xop_t *arg)
                                           HAMMER2_LOOKUP_ALWAYS |
                                           HAMMER2_LOOKUP_SHARED);
        }
-       error = hammer2_error_to_errno(error);
 done:
        hammer2_xop_feed(&xop->head, NULL, thr->clindex, error);
        if (parent) {
@@ -787,7 +799,7 @@ hammer2_xop_lookup(hammer2_thread_t *thr, hammer2_xop_t *arg)
                                     HAMMER2_RESOLVE_SHARED);
        chain = NULL;
        if (parent == NULL) {
-               error = EIO;
+               error = HAMMER2_ERROR_EIO;
                goto done;
        }
 
@@ -800,12 +812,11 @@ hammer2_xop_lookup(hammer2_thread_t *thr, hammer2_xop_t *arg)
                                     &error,
                                     HAMMER2_LOOKUP_ALWAYS |
                                     HAMMER2_LOOKUP_SHARED);
-       error = hammer2_error_to_errno(error);
        if (error == 0) {
                if (chain)
-                       error = hammer2_error_to_errno(chain->error);
+                       error = chain->error;
                else
-                       error = ENOENT;
+                       error = HAMMER2_ERROR_ENOENT;
        }
        hammer2_xop_feed(&xop->head, chain, thr->clindex, error);
 
@@ -869,7 +880,6 @@ hammer2_xop_scanall(hammer2_thread_t *thr, hammer2_xop_t *arg)
                                           key_next, xop->key_end,
                                           &error, xop->lookup_flags);
        }
-       error = hammer2_error_to_errno(error);
 break2:
        if (chain) {
                hammer2_chain_unlock(chain);