hammer2 - stabilization, sequencing
authorMatthew Dillon <dillon@apollo.backplane.com>
Fri, 28 Aug 2015 06:15:17 +0000 (23:15 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Fri, 28 Aug 2015 06:26:46 +0000 (23:26 -0700)
* Change the way XOPs are dispatched.  Instead of dispatching a XOP
  to specific service threads in the xgrp we now queue the XOP to a
  per-cluster-node xopq and allow the service threads to compete for
  work.

* Implement XOP dependency tracking.  Cluster nodes can complete execution
  of a XOP in any order and the frontend may complete early on-quorum while
  the XOP is still executing on other cluster nodes.  In this situation
  the inode lock may be released or cycled and another, dependent XOP may
  be issued, potentially executing out of order on the cluster nodes that
  have not yet finished (or possibly even started) the previous XOP.

  With the new queueing mechanism we also implement dependency tracking
  using the xop->ip1, ip2, and ip3 fields.  Dependent XOPs on (slow) cluster
  nodes will be held-back and ordering of dependent XOPS will be enforced.

  This fixes one of several cluster-related issues that will need to be
  addressed in order to free the frontend of slow backend cluster nodes.

* The strategy XOP now uses a per-XOP lock instead of an xgrp lock
  (xgrp is no longer used in the xop structure and XOPs are now
  distributed to potentially different xgrps for each cluster node).

* Normalize some structural field names.

* bulkfree now takes a snapshot of vchain, using a separate (temporary)
  chain structure which isolates the entire topology scan from the
  live filesystem.

  This required minor adjustments to base/count handling for
  HAMMER2_BREF_TYPE_VOLUME and HAMMER2_BREF_TYPE_FREEMAP.

* Debugging adjustments.

13 files changed:
sys/vfs/hammer2/TODO
sys/vfs/hammer2/hammer2.h
sys/vfs/hammer2/hammer2_bulkscan.c
sys/vfs/hammer2/hammer2_chain.c
sys/vfs/hammer2/hammer2_disk.h
sys/vfs/hammer2/hammer2_flush.c
sys/vfs/hammer2/hammer2_inode.c
sys/vfs/hammer2/hammer2_ioctl.c
sys/vfs/hammer2/hammer2_strategy.c
sys/vfs/hammer2/hammer2_thread.c
sys/vfs/hammer2/hammer2_vfsops.c
sys/vfs/hammer2/hammer2_vnops.c
sys/vfs/hammer2/hammer2_xops.c

index b28f708..0438c22 100644 (file)
@@ -1,3 +1,7 @@
+
+* bulkfree pass needs to do a vchain flush from the root to avoid
+  accidently freeing live in-process chains.
+
 * Need backend synchronization / serialization when the frontend detaches
   a XOP.  modify_tid tests won't be enough, the backend may wind up executing
   the XOP out of order after the detach.
index a689bb5..74c17e2 100644 (file)
@@ -784,13 +784,13 @@ typedef struct hammer2_trans hammer2_trans_t;
  */
 struct hammer2_thread {
        struct hammer2_pfs *pmp;
+       hammer2_xop_list_t *xopq;       /* points into pmp->xopq[] */
        thread_t        td;
        uint32_t        flags;
        int             depth;
        int             clindex;        /* cluster element index */
        int             repidx;
        struct lock     lk;             /* thread control lock */
-       hammer2_xop_list_t xopq;
 };
 
 typedef struct hammer2_thread hammer2_thread_t;
@@ -811,6 +811,8 @@ typedef struct hammer2_thread hammer2_thread_t;
  * nodes.  It provides a rendezvous for concurrent node execution and
  * can be detached from the frontend operation to allow the frontend to
  * return early.
+ *
+ * This structure also sequences operations on up to three inodes.
  */
 typedef void (*hammer2_xop_func_t)(union hammer2_xop *xop, int clidx);
 
@@ -820,24 +822,25 @@ typedef struct hammer2_xop_fifo {
        int                     errors[HAMMER2_XOPFIFO];
        int                     ri;
        int                     wi;
-       int                     unused03;
+       int                     flags;
 } hammer2_xop_fifo_t;
 
+#define HAMMER2_XOP_FIFO_RUN   0x0001
+
 struct hammer2_xop_head {
        hammer2_xop_func_t      func;
        hammer2_tid_t           mtid;
-       struct hammer2_inode    *ip;
+       struct hammer2_inode    *ip1;
        struct hammer2_inode    *ip2;
        struct hammer2_inode    *ip3;
-       struct hammer2_xop_group *xgrp;
        uint32_t                check_counter;
        uint32_t                run_mask;
        uint32_t                chk_mask;
        int                     state;
        int                     error;
        hammer2_key_t           collect_key;
-       char                    *name;
-       size_t                  name_len;
+       char                    *name1;
+       size_t                  name1_len;
        char                    *name2;
        size_t                  name2_len;
        hammer2_xop_fifo_t      collect[HAMMER2_MAXCLUSTER];
@@ -854,6 +857,7 @@ struct hammer2_xop_strategy {
        hammer2_xop_head_t      head;
        hammer2_key_t           lbase;
        int                     finished;
+       hammer2_mtx_t           lock;
        struct bio              *bio;
 };
 
@@ -977,8 +981,6 @@ typedef union hammer2_xop hammer2_xop_t;
  */
 struct hammer2_xop_group {
        hammer2_thread_t        thrs[HAMMER2_MAXCLUSTER];
-       hammer2_mtx_t           mtx;
-       hammer2_mtx_t           mtx2;
 };
 
 typedef struct hammer2_xop_group hammer2_xop_group_t;
@@ -1025,6 +1027,7 @@ struct hammer2_dev {
        struct spinlock list_spin;
        struct h2_flush_list    flushq; /* flush seeds */
        struct hammer2_pfs *spmp;       /* super-root pmp for transactions */
+       struct lock     bulklk;         /* bulkfree lock */
        struct lock     vollk;          /* lockmgr lock */
        hammer2_off_t   heur_freemap[HAMMER2_FREEMAP_HEUR];
        int             volhdrno;       /* last volhdrno written */
@@ -1121,7 +1124,9 @@ struct hammer2_pfs {
        hammer2_thread_t        sync_thrs[HAMMER2_MAXCLUSTER];
        uint32_t                cluster_flags;  /* cached cluster flags */
        int                     has_xop_threads;
+       struct spinlock         xop_spin;       /* xop sequencer */
        hammer2_xop_group_t     xop_groups[HAMMER2_XOPGROUPS];
+       hammer2_xop_list_t      xopq[HAMMER2_MAXCLUSTER];
 };
 
 typedef struct hammer2_pfs hammer2_pfs_t;
@@ -1382,6 +1387,8 @@ void hammer2_delayed_flush(hammer2_chain_t *chain);
 void hammer2_chain_setflush(hammer2_chain_t *chain);
 void hammer2_chain_countbrefs(hammer2_chain_t *chain,
                                hammer2_blockref_t *base, int count);
+hammer2_chain_t *hammer2_chain_bulksnap(hammer2_chain_t *chain);
+void hammer2_chain_bulkdrop(hammer2_chain_t *copy);
 
 void hammer2_chain_setcheck(hammer2_chain_t *chain, void *bdata);
 int hammer2_chain_testcheck(hammer2_chain_t *chain, void *bdata);
index 15f2707..e461c1e 100644 (file)
@@ -162,23 +162,27 @@ hammer2_bulk_scan(hammer2_chain_t *parent,
 }
 
 /*
- * Bulkfree algorithm -
+ * Bulkfree algorithm
  *
- * DoTwice {
- *     flush sync
- *     Scan the whole topology and build the freemap
- *     ** -> 11 during scan for all elements scanned (and thus not free)
- *     11 -> 10 after scan if allocated in-topo and free in-memory, mark 10
- *     10 -> 00 after scan if possibly-free in-topo and free in-memory mark 00
+ * Repeat {
+ *     Chain flush (partial synchronization)
+ *     Scan the whole topology - build in-memory freemap (mark 11)
+ *     Reconcile the in-memory freemap against the on-disk freemap.
+ *             ondisk xx -> ondisk 11 (if allocated)
+ *             ondisk 11 -> ondisk 10 (if free in-memory)
+ *             ondisk 10 -> ondisk 00 (if free in-memory) - on next pass
  * }
  *
- * Adjustment of the freemap ->10 and ->00 cannot occur until the topology
- * scan is complete.  The scan runs concurrentlyt with normal filesystem
- * operations and any allocation will also remark the freemap bitmap 11.
- * We handle races by performing two scans and only changing the map to
- * fully free (00) if both passes believe it is free.
+ * The topology scan may have to be performed multiple times to window
+ * freemaps which are too large to fit in kernel memory.
  *
- * Temporary memory in multiples of 64KB is required to reconstruct leaf
+ * Races are handled using a double-transition (11->10, 10->00).  The bulkfree
+ * scan snapshots the volume root's blockset and thus can run concurrent with
+ * normal operations, as long as a full flush is made between each pass to
+ * synchronize any modified chains (otherwise their blocks might be improperly
+ * freed).
+ *
+ * Temporary memory in multiples of 64KB is required to reconstruct the leaf
  * hammer2_bmap_data blocks so they can later be compared against the live
  * freemap.  Each 64KB block represents 128 x 16KB x 1024 = ~2 GB of storage.
  * A 32MB save area thus represents around ~1 TB.  The temporary memory
@@ -214,12 +218,27 @@ int
 hammer2_bulkfree_pass(hammer2_dev_t *hmp, hammer2_ioc_bulkfree_t *bfi)
 {
        hammer2_bulkfree_info_t cbinfo;
+       hammer2_chain_t *vchain;
        hammer2_off_t incr;
        size_t size;
        int doabort = 0;
 
+       /*
+        * A bulkfree operations lock is required for the duration.  We
+        * must hold it across our flushes to guarantee that we never run
+        * two bulkfree passes in a row without a flush in the middle.
+        */
+       lockmgr(&hmp->bulklk, LK_EXCLUSIVE);
+
+       /*
+        * Flush-a-roonie.  A full filesystem flush is not needed
+        */
+
        /* hammer2_vfs_sync(hmp->mp, MNT_WAIT); XXX */
 
+       /*
+        * Setup for free pass
+        */
        bzero(&cbinfo, sizeof(cbinfo));
        size = (bfi->size + HAMMER2_FREEMAP_LEVELN_PSIZE - 1) &
               ~(size_t)(HAMMER2_FREEMAP_LEVELN_PSIZE - 1);
@@ -235,6 +254,16 @@ hammer2_bulkfree_pass(hammer2_dev_t *hmp, hammer2_ioc_bulkfree_t *bfi)
                cbinfo.sbase = hmp->voldata.volu_size;
        cbinfo.sbase &= ~HAMMER2_FREEMAP_LEVEL1_MASK;
 
+       /*
+        * The primary storage scan must use a snapshot of the volume
+        * root to avoid racing renames and other frontend work.
+        *
+        * Note that snapshots only snap synchronized storage, so
+        * we have to flush between each pass or we risk freeing
+        * storage allocated by the frontend.
+        */
+       vchain = hammer2_chain_bulksnap(&hmp->vchain);
+
        /*
         * Loop on a full meta-data scan as many times as required to
         * get through all available storage.
@@ -258,8 +287,11 @@ hammer2_bulkfree_pass(hammer2_dev_t *hmp, hammer2_ioc_bulkfree_t *bfi)
 
                hammer2_trans_init(hmp->spmp, 0);
                cbinfo.mtid = hammer2_trans_sub(hmp->spmp);
-               doabort |= hammer2_bulk_scan(&hmp->vchain,
-                                           h2_bulkfree_callback, &cbinfo);
+
+               doabort |= hammer2_bulk_scan(vchain, h2_bulkfree_callback,
+                                            &cbinfo);
+               kprintf("bulkfree lastdrop %d %d\n",
+                       vchain->refs, vchain->core.chain_count);
 
                /*
                 * If complete scan succeeded we can synchronize our
@@ -284,6 +316,7 @@ hammer2_bulkfree_pass(hammer2_dev_t *hmp, hammer2_ioc_bulkfree_t *bfi)
                        break;
                cbinfo.sbase = cbinfo.sstop;
        }
+       hammer2_chain_bulkdrop(vchain);
        kmem_free_swapbacked(&cbinfo.kp);
 
        bfi->sstop = cbinfo.sbase;
@@ -302,6 +335,8 @@ hammer2_bulkfree_pass(hammer2_dev_t *hmp, hammer2_ioc_bulkfree_t *bfi)
        kprintf("    ~2MB segs cleaned  %ld\n", cbinfo.count_l0cleans);
        kprintf("    linear adjusts     %ld\n", cbinfo.count_linadjusts);
 
+       lockmgr(&hmp->bulklk, LK_RELEASE);
+
        return doabort;
 }
 
index ede3535..65b8d3e 100644 (file)
@@ -190,8 +190,12 @@ hammer2_chain_alloc(hammer2_dev_t *hmp, hammer2_pfs_t *pmp,
                break;
        case HAMMER2_BREF_TYPE_VOLUME:
        case HAMMER2_BREF_TYPE_FREEMAP:
-               chain = NULL;
-               panic("hammer2_chain_alloc volume type illegal for op");
+               /*
+                * Only hammer2_chain_bulksnap() calls this function with these
+                * types.
+                */
+               chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO);
+               break;
        default:
                chain = NULL;
                panic("hammer2_chain_alloc: unrecognized blockref type: %d",
@@ -1729,11 +1733,11 @@ again:
                count = parent->bytes / sizeof(hammer2_blockref_t);
                break;
        case HAMMER2_BREF_TYPE_VOLUME:
-               base = &hmp->voldata.sroot_blockset.blockref[0];
+               base = &parent->data->voldata.sroot_blockset.blockref[0];
                count = HAMMER2_SET_COUNT;
                break;
        case HAMMER2_BREF_TYPE_FREEMAP:
-               base = &hmp->voldata.freemap_blockset.blockref[0];
+               base = &parent->data->blkset.blockref[0];
                count = HAMMER2_SET_COUNT;
                break;
        default:
@@ -2055,6 +2059,10 @@ again:
        case HAMMER2_BREF_TYPE_INODE:
                /*
                 * An inode with embedded data has no sub-chains.
+                *
+                * WARNING! Bulk scan code may pass a static chain marked
+                *          as BREF_TYPE_INODE with a copy of the volume
+                *          root blockset to snapshot the volume.
                 */
                if (parent->data->ipdata.meta.op_flags &
                    HAMMER2_OPFLAG_DIRECTDATA) {
@@ -2079,11 +2087,11 @@ again:
                count = parent->bytes / sizeof(hammer2_blockref_t);
                break;
        case HAMMER2_BREF_TYPE_VOLUME:
-               base = &hmp->voldata.sroot_blockset.blockref[0];
+               base = &parent->data->voldata.sroot_blockset.blockref[0];
                count = HAMMER2_SET_COUNT;
                break;
        case HAMMER2_BREF_TYPE_FREEMAP:
-               base = &hmp->voldata.freemap_blockset.blockref[0];
+               base = &parent->data->blkset.blockref[0];
                count = HAMMER2_SET_COUNT;
                break;
        default:
@@ -2359,12 +2367,12 @@ again:
                break;
        case HAMMER2_BREF_TYPE_VOLUME:
                KKASSERT(parent->data != NULL);
-               base = &hmp->voldata.sroot_blockset.blockref[0];
+               base = &parent->data->voldata.sroot_blockset.blockref[0];
                count = HAMMER2_SET_COUNT;
                break;
        case HAMMER2_BREF_TYPE_FREEMAP:
                KKASSERT(parent->data != NULL);
-               base = &hmp->voldata.freemap_blockset.blockref[0];
+               base = &parent->data->blkset.blockref[0];
                count = HAMMER2_SET_COUNT;
                break;
        default:
@@ -2643,11 +2651,12 @@ _hammer2_chain_delete_helper(hammer2_chain_t *parent, hammer2_chain_t *chain,
                        count = parent->bytes / sizeof(hammer2_blockref_t);
                        break;
                case HAMMER2_BREF_TYPE_VOLUME:
-                       base = &hmp->voldata.sroot_blockset.blockref[0];
+                       base = &parent->data->voldata.
+                                       sroot_blockset.blockref[0];
                        count = HAMMER2_SET_COUNT;
                        break;
                case HAMMER2_BREF_TYPE_FREEMAP:
-                       base = &parent->data->npdata[0];
+                       base = &parent->data->blkset.blockref[0];
                        count = HAMMER2_SET_COUNT;
                        break;
                default:
@@ -2823,11 +2832,12 @@ hammer2_chain_create_indirect(hammer2_chain_t *parent,
                        count = parent->bytes / sizeof(hammer2_blockref_t);
                        break;
                case HAMMER2_BREF_TYPE_VOLUME:
-                       base = &hmp->voldata.sroot_blockset.blockref[0];
+                       base = &parent->data->voldata.
+                                       sroot_blockset.blockref[0];
                        count = HAMMER2_SET_COUNT;
                        break;
                case HAMMER2_BREF_TYPE_FREEMAP:
-                       base = &hmp->voldata.freemap_blockset.blockref[0];
+                       base = &parent->data->blkset.blockref[0];
                        count = HAMMER2_SET_COUNT;
                        break;
                default:
@@ -3882,11 +3892,11 @@ hammer2_base_sort(hammer2_chain_t *chain)
                count = chain->bytes / sizeof(hammer2_blockref_t);
                break;
        case HAMMER2_BREF_TYPE_VOLUME:
-               base = &chain->hmp->voldata.sroot_blockset.blockref[0];
+               base = &chain->data->voldata.sroot_blockset.blockref[0];
                count = HAMMER2_SET_COUNT;
                break;
        case HAMMER2_BREF_TYPE_FREEMAP:
-               base = &chain->hmp->voldata.freemap_blockset.blockref[0];
+               base = &chain->data->blkset.blockref[0];
                count = HAMMER2_SET_COUNT;
                break;
        default:
@@ -4127,6 +4137,55 @@ done:
        return (rchain ? EINVAL : 0);
 }
 
+/*
+ * Used by the bulkscan code to snapshot the synchronized storage for
+ * a volume, allowing it to be scanned concurrently against normal
+ * operation.
+ */
+hammer2_chain_t *
+hammer2_chain_bulksnap(hammer2_chain_t *chain)
+{
+       hammer2_chain_t *copy;
+
+       copy = hammer2_chain_alloc(chain->hmp, chain->pmp, &chain->bref);
+       switch(chain->bref.type) {
+       case HAMMER2_BREF_TYPE_VOLUME:
+               copy->data = kmalloc(sizeof(copy->data->voldata),
+                                    chain->hmp->mchain,
+                                    M_WAITOK | M_ZERO);
+               hammer2_spin_ex(&chain->core.spin);
+               copy->data->voldata = chain->data->voldata;
+               hammer2_spin_unex(&chain->core.spin);
+               break;
+       case HAMMER2_BREF_TYPE_FREEMAP:
+               copy->data = kmalloc(sizeof(hammer2_blockset_t),
+                                    chain->hmp->mchain,
+                                    M_WAITOK | M_ZERO);
+               hammer2_spin_ex(&chain->core.spin);
+               copy->data->blkset = chain->data->blkset;
+               hammer2_spin_unex(&chain->core.spin);
+               break;
+       default:
+               break;
+       }
+       return copy;
+}
+
+void
+hammer2_chain_bulkdrop(hammer2_chain_t *copy)
+{
+       switch(copy->bref.type) {
+       case HAMMER2_BREF_TYPE_VOLUME:
+       case HAMMER2_BREF_TYPE_FREEMAP:
+               KKASSERT(copy->data);
+               kfree(copy->data, copy->hmp->mchain);
+               copy->data = NULL;
+       default:
+               break;
+       }
+       hammer2_chain_drop(copy);
+}
+
 /*
  * Create a snapshot of the specified {parent, ochain} with the specified
  * label.  The originating hammer2_inode must be exclusively locked for
index 7c47953..29cccfa 100644 (file)
@@ -1249,6 +1249,7 @@ typedef struct hammer2_volume_data hammer2_volume_data_t;
 union hammer2_media_data {
        hammer2_volume_data_t   voldata;
         hammer2_inode_data_t    ipdata;
+       hammer2_blockset_t      blkset;
        hammer2_blockref_t      npdata[HAMMER2_IND_COUNT_MAX];
        hammer2_bmap_data_t     bmdata[HAMMER2_FREEMAP_COUNT];
        char                    buf[HAMMER2_PBUFSIZE];
index 8e69ebe..6e6790c 100644 (file)
@@ -1047,7 +1047,7 @@ hammer2_inode_xop_flush(hammer2_xop_t *arg, int clindex)
        /*
         * Flush core chains
         */
-       chain = hammer2_inode_chain(xop->head.ip, clindex,
+       chain = hammer2_inode_chain(xop->head.ip1, clindex,
                                    HAMMER2_RESOLVE_ALWAYS);
        if (chain) {
                hmp = chain->hmp;
@@ -1069,7 +1069,7 @@ hammer2_inode_xop_flush(hammer2_xop_t *arg, int clindex)
         * flush each hammer2_dev (hmp) once.
         */
        for (j = clindex - 1; j >= 0; --j) {
-               if ((chain = xop->head.ip->cluster.array[j].chain) != NULL) {
+               if ((chain = xop->head.ip1->cluster.array[j].chain) != NULL) {
                        if (chain->hmp == hmp) {
                                chain = NULL;   /* safety */
                                goto skip;
index a05e7df..075e1b7 100644 (file)
@@ -274,6 +274,10 @@ void
 hammer2_inode_ref(hammer2_inode_t *ip)
 {
        atomic_add_int(&ip->refs, 1);
+       if (hammer2_debug & 0x80000) {
+               kprintf("INODE+1 %p (%d->%d)\n", ip, ip->refs - 1, ip->refs);
+               print_backtrace(8);
+       }
 }
 
 /*
@@ -288,6 +292,11 @@ hammer2_inode_drop(hammer2_inode_t *ip)
        u_int refs;
 
        while (ip) {
+               if (hammer2_debug & 0x80000) {
+                       kprintf("INODE-1 %p (%d->%d)\n",
+                               ip, ip->refs, ip->refs - 1);
+                       print_backtrace(8);
+               }
                refs = ip->refs;
                cpu_ccfence();
                if (refs == 1) {
@@ -1401,7 +1410,7 @@ hammer2_inode_xop_create(hammer2_xop_t *arg, int clindex)
                xop->lhc, clindex);
 
        chain = NULL;
-       parent = hammer2_inode_chain(xop->head.ip, clindex,
+       parent = hammer2_inode_chain(xop->head.ip1, clindex,
                                     HAMMER2_RESOLVE_ALWAYS);
        if (parent == NULL) {
                error = EIO;
@@ -1417,7 +1426,7 @@ hammer2_inode_xop_create(hammer2_xop_t *arg, int clindex)
        }
 
        error = hammer2_chain_create(&parent, &chain,
-                                    xop->head.ip->pmp,
+                                    xop->head.ip1->pmp,
                                     xop->lhc, 0,
                                     HAMMER2_BREF_TYPE_INODE,
                                     HAMMER2_INODE_BYTES,
@@ -1425,11 +1434,11 @@ hammer2_inode_xop_create(hammer2_xop_t *arg, int clindex)
        if (error == 0) {
                hammer2_chain_modify(chain, xop->head.mtid, 0);
                chain->data->ipdata.meta = xop->meta;
-               if (xop->head.name) {
-                       bcopy(xop->head.name,
+               if (xop->head.name1) {
+                       bcopy(xop->head.name1,
                              chain->data->ipdata.filename,
-                             xop->head.name_len);
-                       chain->data->ipdata.meta.name_len = xop->head.name_len;
+                             xop->head.name1_len);
+                       chain->data->ipdata.meta.name_len = xop->head.name1_len;
                }
                chain->data->ipdata.meta.name_key = xop->lhc;
        }
@@ -1464,7 +1473,7 @@ hammer2_inode_xop_destroy(hammer2_xop_t *arg, int clindex)
        /*
         * We need the precise parent chain to issue the deletion.
         */
-       ip = xop->head.ip;
+       ip = xop->head.ip1;
        pmp = ip->pmp;
        chain = NULL;
 
@@ -1506,7 +1515,7 @@ hammer2_inode_xop_unlinkall(hammer2_xop_t *arg, int clindex)
        /*
         * We need the precise parent chain to issue the deletion.
         */
-       parent = hammer2_inode_chain(xop->head.ip, clindex,
+       parent = hammer2_inode_chain(xop->head.ip1, clindex,
                                     HAMMER2_RESOLVE_ALWAYS);
        chain = NULL;
        if (parent == NULL) {
@@ -1558,8 +1567,8 @@ hammer2_inode_xop_connect(hammer2_xop_t *arg, int clindex)
         * Get directory, then issue a lookup to prime the parent chain
         * for the create.  The lookup is expected to fail.
         */
-       pmp = xop->head.ip->pmp;
-       parent = hammer2_inode_chain(xop->head.ip, clindex,
+       pmp = xop->head.ip1->pmp;
+       parent = hammer2_inode_chain(xop->head.ip1, clindex,
                                     HAMMER2_RESOLVE_ALWAYS);
        if (parent == NULL) {
                chain = NULL;
@@ -1589,10 +1598,10 @@ hammer2_inode_xop_connect(hammer2_xop_t *arg, int clindex)
        wipdata = &chain->data->ipdata;
 
        hammer2_inode_modify(xop->head.ip2);
-       if (xop->head.name) {
+       if (xop->head.name1) {
                bzero(wipdata->filename, sizeof(wipdata->filename));
-               bcopy(xop->head.name, wipdata->filename, xop->head.name_len);
-               wipdata->meta.name_len = xop->head.name_len;
+               bcopy(xop->head.name1, wipdata->filename, xop->head.name1_len);
+               wipdata->meta.name_len = xop->head.name1_len;
        }
        wipdata->meta.name_key = xop->lhc;
 
@@ -1628,7 +1637,7 @@ hammer2_inode_xop_fsync(hammer2_xop_t *arg, int clindex)
        hammer2_chain_t *chain;
        int error;
 
-       parent = hammer2_inode_chain(xop->head.ip, clindex,
+       parent = hammer2_inode_chain(xop->head.ip1, clindex,
                                     HAMMER2_RESOLVE_ALWAYS);
        chain = NULL;
        if (parent == NULL) {
index 3196016..a210f7a 100644 (file)
@@ -834,6 +834,13 @@ hammer2_ioctl_debug_dump(hammer2_inode_t *ip)
        return 0;
 }
 
+/*
+ * Executes one flush/free pass per call.  If trying to recover
+ * data we just freed up a moment ago it can take up to six passes
+ * to fully free the blocks.  Note that passes occur automatically based
+ * on free space as the storage fills up, but manual passes may be needed
+ * if storage becomes almost completely full.
+ */
 static
 int
 hammer2_ioctl_bulkfree_scan(hammer2_inode_t *ip, void *data)
@@ -846,7 +853,6 @@ hammer2_ioctl_bulkfree_scan(hammer2_inode_t *ip, void *data)
        if (hmp == NULL)
                return (EINVAL);
 
-       /* XXX run local cluster targets only */
        error = hammer2_bulkfree_pass(hmp, bfi);
 
        return error;
index e1e2ecb..741aacd 100644 (file)
@@ -253,6 +253,7 @@ hammer2_strategy_read(struct vop_strategy_args *ap)
        xop->finished = 0;
        xop->bio = bio;
        xop->lbase = lbase;
+       hammer2_mtx_init(&xop->lock, "h2bio");
        hammer2_xop_start(&xop->head, hammer2_strategy_xop_read);
 
        return(0);
@@ -281,7 +282,7 @@ hammer2_strategy_xop_read(hammer2_xop_t *arg, int clindex)
        bio = xop->bio;
        bp = bio->bio_buf;
 
-       parent = hammer2_inode_chain(xop->head.ip, clindex,
+       parent = hammer2_inode_chain(xop->head.ip1, clindex,
                                     HAMMER2_RESOLVE_ALWAYS |
                                     HAMMER2_RESOLVE_SHARED);
        if (parent) {
@@ -310,9 +311,9 @@ hammer2_strategy_xop_read(hammer2_xop_t *arg, int clindex)
         */
        if (xop->finished)
                return;
-       hammer2_mtx_ex(&xop->head.xgrp->mtx2);
+       hammer2_mtx_ex(&xop->lock);
        if (xop->finished) {
-               hammer2_mtx_unlock(&xop->head.xgrp->mtx2);
+               hammer2_mtx_unlock(&xop->lock);
                return;
        }
 
@@ -326,7 +327,7 @@ hammer2_strategy_xop_read(hammer2_xop_t *arg, int clindex)
        switch(error) {
        case 0:
                xop->finished = 1;
-               hammer2_mtx_unlock(&xop->head.xgrp->mtx2);
+               hammer2_mtx_unlock(&xop->lock);
                chain = xop->head.cluster.focus;
                hammer2_strategy_read_completion(chain, (char *)chain->data,
                                                 xop->bio);
@@ -335,7 +336,7 @@ hammer2_strategy_xop_read(hammer2_xop_t *arg, int clindex)
                break;
        case ENOENT:
                xop->finished = 1;
-               hammer2_mtx_unlock(&xop->head.xgrp->mtx2);
+               hammer2_mtx_unlock(&xop->lock);
                bp->b_resid = 0;
                bp->b_error = 0;
                bzero(bp->b_data, bp->b_bcount);
@@ -343,11 +344,11 @@ hammer2_strategy_xop_read(hammer2_xop_t *arg, int clindex)
                hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
                break;
        case EINPROGRESS:
-               hammer2_mtx_unlock(&xop->head.xgrp->mtx2);
+               hammer2_mtx_unlock(&xop->lock);
                break;
        default:
                xop->finished = 1;
-               hammer2_mtx_unlock(&xop->head.xgrp->mtx2);
+               hammer2_mtx_unlock(&xop->lock);
                bp->b_flags |= B_ERROR;
                bp->b_error = EIO;
                biodone(bio);
@@ -490,7 +491,7 @@ hammer2_strategy_xop_write(hammer2_xop_t *arg, int clindex)
        lbase = xop->lbase;
        bio = xop->bio;
        bp = bio->bio_buf;
-       ip = xop->head.ip;
+       ip = xop->head.ip1;
 
        /* hammer2_trans_init(parent->hmp->spmp, HAMMER2_TRANS_BUFCACHE); */
 
@@ -512,9 +513,9 @@ hammer2_strategy_xop_write(hammer2_xop_t *arg, int clindex)
         */
        if (xop->finished)
                return;
-       hammer2_mtx_ex(&xop->head.xgrp->mtx2);
+       hammer2_mtx_ex(&xop->lock);
        if (xop->finished) {
-               hammer2_mtx_unlock(&xop->head.xgrp->mtx2);
+               hammer2_mtx_unlock(&xop->lock);
                return;
        }
 
@@ -529,7 +530,7 @@ hammer2_strategy_xop_write(hammer2_xop_t *arg, int clindex)
        case ENOENT:
        case 0:
                xop->finished = 1;
-               hammer2_mtx_unlock(&xop->head.xgrp->mtx2);
+               hammer2_mtx_unlock(&xop->lock);
                bp->b_resid = 0;
                bp->b_error = 0;
                biodone(bio);
@@ -537,11 +538,11 @@ hammer2_strategy_xop_write(hammer2_xop_t *arg, int clindex)
                hammer2_lwinprog_drop(ip->pmp);
                break;
        case EINPROGRESS:
-               hammer2_mtx_unlock(&xop->head.xgrp->mtx2);
+               hammer2_mtx_unlock(&xop->lock);
                break;
        default:
                xop->finished = 1;
-               hammer2_mtx_unlock(&xop->head.xgrp->mtx2);
+               hammer2_mtx_unlock(&xop->lock);
                bp->b_flags |= B_ERROR;
                bp->b_error = EIO;
                biodone(bio);
index 1d0a110..e1c346c 100644 (file)
@@ -84,9 +84,9 @@ hammer2_thr_create(hammer2_thread_t *thr, hammer2_pfs_t *pmp,
 {
        lockinit(&thr->lk, "h2thr", 0, 0);
        thr->pmp = pmp;
+       thr->xopq = &pmp->xopq[clindex];
        thr->clindex = clindex;
        thr->repidx = repidx;
-       TAILQ_INIT(&thr->xopq);
        if (repidx >= 0) {
                lwkt_create(func, thr, &thr->td, NULL, 0, -1,
                            "%s-%s.%02d", id, pmp->pfs_names[clindex], repidx);
@@ -110,12 +110,13 @@ hammer2_thr_delete(hammer2_thread_t *thr)
                return;
        lockmgr(&thr->lk, LK_EXCLUSIVE);
        atomic_set_int(&thr->flags, HAMMER2_THREAD_STOP);
-       wakeup(&thr->flags);
+       wakeup(thr->xopq);
        while (thr->td) {
                lksleep(thr, &thr->lk, 0, "h2thr", hz);
        }
        lockmgr(&thr->lk, LK_RELEASE);
        thr->pmp = NULL;
+       thr->xopq = NULL;
        lockuninit(&thr->lk);
 }
 
@@ -131,7 +132,7 @@ hammer2_thr_remaster(hammer2_thread_t *thr)
                return;
        lockmgr(&thr->lk, LK_EXCLUSIVE);
        atomic_set_int(&thr->flags, HAMMER2_THREAD_REMASTER);
-       wakeup(&thr->flags);
+       wakeup(thr->xopq);
        lockmgr(&thr->lk, LK_RELEASE);
 }
 
@@ -139,7 +140,7 @@ void
 hammer2_thr_freeze_async(hammer2_thread_t *thr)
 {
        atomic_set_int(&thr->flags, HAMMER2_THREAD_FREEZE);
-       wakeup(&thr->flags);
+       wakeup(thr->xopq);
 }
 
 void
@@ -149,7 +150,7 @@ hammer2_thr_freeze(hammer2_thread_t *thr)
                return;
        lockmgr(&thr->lk, LK_EXCLUSIVE);
        atomic_set_int(&thr->flags, HAMMER2_THREAD_FREEZE);
-       wakeup(&thr->flags);
+       wakeup(thr->xopq);
        while ((thr->flags & HAMMER2_THREAD_FROZEN) == 0) {
                lksleep(thr, &thr->lk, 0, "h2frz", hz);
        }
@@ -163,7 +164,7 @@ hammer2_thr_unfreeze(hammer2_thread_t *thr)
                return;
        lockmgr(&thr->lk, LK_EXCLUSIVE);
        atomic_clear_int(&thr->flags, HAMMER2_THREAD_FROZEN);
-       wakeup(&thr->flags);
+       wakeup(thr->xopq);
        lockmgr(&thr->lk, LK_RELEASE);
 }
 
@@ -218,7 +219,7 @@ hammer2_primary_sync_thread(void *arg)
                 * Force idle if frozen until unfrozen or stopped.
                 */
                if (thr->flags & HAMMER2_THREAD_FROZEN) {
-                       lksleep(&thr->flags, &thr->lk, 0, "frozen", 0);
+                       lksleep(thr->xopq, &thr->lk, 0, "frozen", 0);
                        continue;
                }
 
@@ -299,7 +300,7 @@ hammer2_primary_sync_thread(void *arg)
                /*
                 * Wait for event, or 5-second poll.
                 */
-               lksleep(&thr->flags, &thr->lk, 0, "h2idle", hz * 5);
+               lksleep(thr->xopq, &thr->lk, 0, "h2idle", hz * 5);
        }
        thr->td = NULL;
        wakeup(thr);
@@ -950,8 +951,7 @@ hammer2_sync_replace(hammer2_thread_t *thr,
 void
 hammer2_xop_group_init(hammer2_pfs_t *pmp, hammer2_xop_group_t *xgrp)
 {
-       hammer2_mtx_init(&xgrp->mtx, "h2xopq");
-       hammer2_mtx_init(&xgrp->mtx2, "h2xopio");
+       /* no extra fields in structure at the moment */
 }
 
 /*
@@ -969,7 +969,7 @@ hammer2_xop_alloc(hammer2_inode_t *ip, int flags)
 
        xop = objcache_get(cache_xops, M_WAITOK);
        KKASSERT(xop->head.cluster.array[0].chain == NULL);
-       xop->head.ip = ip;
+       xop->head.ip1 = ip;
        xop->head.func = NULL;
        xop->head.state = 0;
        xop->head.error = 0;
@@ -996,9 +996,9 @@ hammer2_xop_alloc(hammer2_inode_t *ip, int flags)
 void
 hammer2_xop_setname(hammer2_xop_head_t *xop, const char *name, size_t name_len)
 {
-       xop->name = kmalloc(name_len + 1, M_HAMMER2, M_WAITOK | M_ZERO);
-       xop->name_len = name_len;
-       bcopy(name, xop->name, name_len);
+       xop->name1 = kmalloc(name_len + 1, M_HAMMER2, M_WAITOK | M_ZERO);
+       xop->name1_len = name_len;
+       bcopy(name, xop->name1, name_len);
 }
 
 void
@@ -1081,26 +1081,56 @@ void
 hammer2_xop_start_except(hammer2_xop_head_t *xop, hammer2_xop_func_t func,
                         int notidx)
 {
+#if 0
        hammer2_xop_group_t *xgrp;
        hammer2_thread_t *thr;
+#endif
        hammer2_pfs_t *pmp;
+#if 0
        int g;
+#endif
        int i;
 
-       pmp = xop->ip->pmp;
+       pmp = xop->ip1->pmp;
        if (pmp->has_xop_threads == 0)
                hammer2_xop_helper_create(pmp);
 
+#if 0
        g = pmp->xop_iterator++;
        g = g & HAMMER2_XOPGROUPS_MASK;
        xgrp = &pmp->xop_groups[g];
-       xop->func = func;
        xop->xgrp = xgrp;
+#endif
+       xop->func = func;
 
-       /* XXX do cluster_resolve or cluster_check here, only start
-        * synchronized elements */
+       /*
+        * The XOP sequencer is based on ip1, ip2, and ip3.  Because ops can
+        * finish early and unlock the related inodes, some targets may get
+        * behind.  The sequencer ensures that ops on the same inode execute
+        * in the same order.
+        */
+       hammer2_spin_ex(&pmp->xop_spin);
+       for (i = 0; i < xop->ip1->cluster.nchains; ++i) {
+               if (i != notidx) {
+                       atomic_set_int(&xop->run_mask, 1U << i);
+                       atomic_set_int(&xop->chk_mask, 1U << i);
+                       TAILQ_INSERT_TAIL(&pmp->xopq[i], xop, collect[i].entry);
+               }
+       }
+       hammer2_spin_unex(&pmp->xop_spin);
 
-       for (i = 0; i < xop->ip->cluster.nchains; ++i) {
+       /*
+        * Try to wakeup just one xop thread for each cluster node.
+        */
+       for (i = 0; i < xop->ip1->cluster.nchains; ++i) {
+               if (i != notidx)
+                       wakeup_one(&pmp->xopq[i]);
+       }
+#if 0
+       /*
+        * Dispatch to concurrent threads.
+        */
+       for (i = 0; i < xop->ip1->cluster.nchains; ++i) {
                thr = &xgrp->thrs[i];
                if (thr->td && i != notidx) {
                        lockmgr(&thr->lk, LK_EXCLUSIVE);
@@ -1115,6 +1145,7 @@ hammer2_xop_start_except(hammer2_xop_head_t *xop, hammer2_xop_func_t func,
                        wakeup(&thr->flags);
                }
        }
+#endif
 }
 
 void
@@ -1129,12 +1160,9 @@ hammer2_xop_start(hammer2_xop_head_t *xop, hammer2_xop_func_t func)
 void
 hammer2_xop_retire(hammer2_xop_head_t *xop, uint32_t mask)
 {
-       hammer2_xop_group_t *xgrp;
        hammer2_chain_t *chain;
        int i;
 
-       xgrp = xop->xgrp;
-
        /*
         * Remove the frontend or remove a backend feeder.  When removing
         * the frontend we must wakeup any backend feeders who are waiting
@@ -1184,9 +1212,9 @@ hammer2_xop_retire(hammer2_xop_head_t *xop, uint32_t mask)
        /*
         * The inode is only held at this point, simply drop it.
         */
-       if (xop->ip) {
-               hammer2_inode_drop(xop->ip);
-               xop->ip = NULL;
+       if (xop->ip1) {
+               hammer2_inode_drop(xop->ip1);
+               xop->ip1 = NULL;
        }
        if (xop->ip2) {
                hammer2_inode_drop(xop->ip2);
@@ -1196,10 +1224,10 @@ hammer2_xop_retire(hammer2_xop_head_t *xop, uint32_t mask)
                hammer2_inode_drop(xop->ip3);
                xop->ip3 = NULL;
        }
-       if (xop->name) {
-               kfree(xop->name, M_HAMMER2);
-               xop->name = NULL;
-               xop->name_len = 0;
+       if (xop->name1) {
+               kfree(xop->name1, M_HAMMER2);
+               xop->name1 = NULL;
+               xop->name1_len = 0;
        }
        if (xop->name2) {
                kfree(xop->name2, M_HAMMER2);
@@ -1447,6 +1475,115 @@ done:
        return error;
 }
 
+/*
+ * N x M processing threads are available to handle XOPs, N per cluster
+ * index x M cluster nodes.  All the threads for any given cluster index
+ * share and pull from the same xopq.
+ *
+ * Locate and return the next runnable xop, or NULL if no xops are
+ * present or none of the xops are currently runnable (for various reasons).
+ * The xop is left on the queue and serves to block other dependent xops
+ * from being run.
+ *
+ * Dependent xops will not be returned.
+ *
+ * Sets HAMMER2_XOP_FIFO_RUN on the returned xop or returns NULL.
+ *
+ * NOTE! Xops run concurrently for each cluster index.
+ */
+#define XOP_HASH_SIZE  16
+#define XOP_HASH_MASK  (XOP_HASH_SIZE - 1)
+
+static __inline
+int
+xop_testhash(hammer2_thread_t *thr, hammer2_inode_t *ip, uint32_t *hash)
+{
+       uint32_t mask;
+       int hv;
+
+       hv = (int)((uintptr_t)ip + (uintptr_t)thr) / sizeof(hammer2_inode_t);
+       mask = 1U << (hv & 31);
+       hv >>= 5;
+
+       return ((int)(hash[hv & XOP_HASH_MASK] & mask));
+}
+
+static __inline
+void
+xop_sethash(hammer2_thread_t *thr, hammer2_inode_t *ip, uint32_t *hash)
+{
+       uint32_t mask;
+       int hv;
+
+       hv = (int)((uintptr_t)ip + (uintptr_t)thr) / sizeof(hammer2_inode_t);
+       mask = 1U << (hv & 31);
+       hv >>= 5;
+
+       hash[hv & XOP_HASH_MASK] |= mask;
+}
+
+static
+hammer2_xop_head_t *
+hammer2_xop_next(hammer2_thread_t *thr)
+{
+       hammer2_pfs_t *pmp = thr->pmp;
+       int clindex = thr->clindex;
+       uint32_t hash[XOP_HASH_SIZE] = { 0 };
+       hammer2_xop_head_t *xop;
+
+       hammer2_spin_ex(&pmp->xop_spin);
+       TAILQ_FOREACH(xop, thr->xopq, collect[clindex].entry) {
+               /*
+                * Check dependency
+                */
+               if (xop_testhash(thr, xop->ip1, hash) ||
+                   (xop->ip2 && xop_testhash(thr, xop->ip2, hash)) ||
+                   (xop->ip3 && xop_testhash(thr, xop->ip3, hash))) {
+                       continue;
+               }
+               xop_sethash(thr, xop->ip1, hash);
+               if (xop->ip2)
+                       xop_sethash(thr, xop->ip2, hash);
+               if (xop->ip3)
+                       xop_sethash(thr, xop->ip3, hash);
+
+               /*
+                * Check already running
+                */
+               if (xop->collect[clindex].flags & HAMMER2_XOP_FIFO_RUN)
+                       continue;
+
+               /*
+                * Found a good one, return it.
+                */
+               atomic_set_int(&xop->collect[clindex].flags,
+                              HAMMER2_XOP_FIFO_RUN);
+               break;
+       }
+       hammer2_spin_unex(&pmp->xop_spin);
+
+       return xop;
+}
+
+/*
+ * Remove the completed XOP from the queue, clear HAMMER2_XOP_FIFO_RUN.
+ *
+ * NOTE! Xops run concurrently for each cluster index.
+ */
+static
+void
+hammer2_xop_dequeue(hammer2_thread_t *thr, hammer2_xop_head_t *xop)
+{
+       hammer2_pfs_t *pmp = thr->pmp;
+       int clindex = thr->clindex;
+
+       hammer2_spin_ex(&pmp->xop_spin);
+       TAILQ_REMOVE(thr->xopq, xop, collect[clindex].entry);
+       atomic_clear_int(&xop->collect[clindex].flags,
+                        HAMMER2_XOP_FIFO_RUN);
+       hammer2_spin_unex(&pmp->xop_spin);
+}
+
 /*
  * Primary management thread for xops support.  Each node has several such
  * threads which replicate front-end operations on cluster nodes.
@@ -1462,11 +1599,10 @@ hammer2_primary_xops_thread(void *arg)
        hammer2_thread_t *thr = arg;
        hammer2_pfs_t *pmp;
        hammer2_xop_head_t *xop;
-       hammer2_xop_group_t *xgrp;
        uint32_t mask;
 
        pmp = thr->pmp;
-       xgrp = &pmp->xop_groups[thr->repidx];
+       /*xgrp = &pmp->xop_groups[thr->repidx]; not needed */
        mask = 1U << thr->clindex;
 
        lockmgr(&thr->lk, LK_EXCLUSIVE);
@@ -1483,7 +1619,7 @@ hammer2_primary_xops_thread(void *arg)
                 * Force idle if frozen until unfrozen or stopped.
                 */
                if (thr->flags & HAMMER2_THREAD_FROZEN) {
-                       lksleep(&thr->flags, &thr->lk, 0, "frozen", 0);
+                       lksleep(thr->xopq, &thr->lk, 0, "frozen", 0);
                        continue;
                }
 
@@ -1503,27 +1639,33 @@ hammer2_primary_xops_thread(void *arg)
                 * may also abort processing if the frontend VOP becomes
                 * inactive.
                 */
-               while ((xop = TAILQ_FIRST(&thr->xopq)) != NULL) {
-                       TAILQ_REMOVE(&thr->xopq, xop,
-                                    collect[thr->clindex].entry);
+               tsleep_interlock(thr->xopq, 0);
+               while ((xop = hammer2_xop_next(thr)) != NULL) {
                        if (hammer2_xop_active(xop)) {
                                lockmgr(&thr->lk, LK_RELEASE);
                                xop->func((hammer2_xop_t *)xop, thr->clindex);
+                               hammer2_xop_dequeue(thr, xop);
                                hammer2_xop_retire(xop, mask);
                                lockmgr(&thr->lk, LK_EXCLUSIVE);
                        } else {
                                hammer2_xop_feed(xop, NULL, thr->clindex,
                                                 ECONNABORTED);
+                               hammer2_xop_dequeue(thr, xop);
                                hammer2_xop_retire(xop, mask);
                        }
                }
 
                /*
-                * Wait for event.
+                * Wait for event.  The xopq is not interlocked by thr->lk,
+                * use the tsleep interlock sequence.
+                *
+                * For robustness poll on a 30-second interval, but nominally
+                * expect to be woken up.
                 */
-               lksleep(&thr->flags, &thr->lk, 0, "h2idle", 0);
+               lksleep(thr->xopq, &thr->lk, PINTERLOCKED, "h2idle", hz*30);
        }
 
+#if 0
        /*
         * Cleanup / termination
         */
@@ -1533,6 +1675,7 @@ hammer2_primary_xops_thread(void *arg)
                             collect[thr->clindex].entry);
                hammer2_xop_retire(xop, mask);
        }
+#endif
 
        thr->td = NULL;
        wakeup(thr);
index 5b8349f..478a6d8 100644 (file)
@@ -346,10 +346,16 @@ hammer2_pfsalloc(hammer2_chain_t *chain, const hammer2_inode_data_t *ripdata,
                kmalloc_create(&pmp->mmsg, "HAMMER2-pfsmsg");
                lockinit(&pmp->lock, "pfslk", 0, 0);
                spin_init(&pmp->inum_spin, "hm2pfsalloc_inum");
+               spin_init(&pmp->xop_spin, "h2xop");
                RB_INIT(&pmp->inum_tree);
                TAILQ_INIT(&pmp->unlinkq);
                spin_init(&pmp->list_spin, "hm2pfsalloc_list");
 
+               /*
+                * Distribute backend operations to threads
+                */
+               for (j = 0; j < HAMMER2_MAXCLUSTER; ++j)
+                       TAILQ_INIT(&pmp->xopq[j]);
                for (j = 0; j < HAMMER2_XOPGROUPS; ++j)
                        hammer2_xop_group_init(pmp, &pmp->xop_groups[j]);
 
@@ -895,6 +901,7 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
                TAILQ_INIT(&hmp->flushq);
 
                lockinit(&hmp->vollk, "h2vol", 0, 0);
+               lockinit(&hmp->bulklk, "h2bulk", 0, 0);
 
                /*
                 * vchain setup. vchain.data is embedded.
@@ -1990,6 +1997,7 @@ hammer2_vfs_sync(struct mount *mp, int waitfor)
                hammer2_xop_start(&xop->head, hammer2_inode_xop_flush);
                error = hammer2_xop_collect(&xop->head,
                                            HAMMER2_XOP_COLLECT_WAITALL);
+               hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
                if (error == ENOENT)
                        error = 0;
        } else {
index 1a10552..5838dd6 100644 (file)
@@ -292,10 +292,18 @@ hammer2_vop_getattr(struct vop_getattr_args *ap)
        hammer2_time_to_timespec(ip->meta.mtime, &vap->va_atime);
        vap->va_gen = 1;
        vap->va_bytes = 0;
-       for (i = 0; i < ip->cluster.nchains; ++i) {
-               if ((chain = ip->cluster.array[i].chain) != NULL) {
-                       if (vap->va_bytes < chain->bref.data_count)
-                               vap->va_bytes = chain->bref.data_count;
+       if (ip->meta.type == HAMMER2_OBJTYPE_DIRECTORY) {
+               /*
+                * Can't really calculate directory use sans the files under
+                * it, just assume one block for now.
+                */
+               vap->va_bytes += HAMMER2_INODE_BYTES;
+       } else {
+               for (i = 0; i < ip->cluster.nchains; ++i) {
+                       if ((chain = ip->cluster.array[i].chain) != NULL) {
+                               if (vap->va_bytes < chain->bref.data_count)
+                                       vap->va_bytes = chain->bref.data_count;
+                       }
                }
        }
        vap->va_type = hammer2_get_vtype(ip->meta.type);
index fce37f2..0d32209 100644 (file)
@@ -73,7 +73,7 @@ hammer2_xop_ipcluster(hammer2_xop_t *arg, int clindex)
        hammer2_chain_t *chain;
        int error;
 
-       chain = hammer2_inode_chain(xop->head.ip, clindex,
+       chain = hammer2_inode_chain(xop->head.ip1, clindex,
                                    HAMMER2_RESOLVE_ALWAYS |
                                    HAMMER2_RESOLVE_SHARED);
        if (chain)
@@ -108,7 +108,7 @@ hammer2_xop_readdir(hammer2_xop_t *arg, int clindex)
         * The inode's chain is the iterator.  If we cannot acquire it our
         * contribution ends here.
         */
-       parent = hammer2_inode_chain(xop->head.ip, clindex,
+       parent = hammer2_inode_chain(xop->head.ip1, clindex,
                                     HAMMER2_RESOLVE_ALWAYS |
                                     HAMMER2_RESOLVE_SHARED);
        if (parent == NULL) {
@@ -163,7 +163,7 @@ hammer2_xop_nresolve(hammer2_xop_t *arg, int clindex)
        int cache_index = -1;   /* XXX */
        int error;
 
-       parent = hammer2_inode_chain(xop->head.ip, clindex,
+       parent = hammer2_inode_chain(xop->head.ip1, clindex,
                                     HAMMER2_RESOLVE_ALWAYS |
                                     HAMMER2_RESOLVE_SHARED);
        if (parent == NULL) {
@@ -172,8 +172,8 @@ hammer2_xop_nresolve(hammer2_xop_t *arg, int clindex)
                error = EIO;
                goto done;
        }
-       name = xop->head.name;
-       name_len = xop->head.name_len;
+       name = xop->head.name1;
+       name_len = xop->head.name1_len;
 
        /*
         * Lookup the directory entry
@@ -206,7 +206,7 @@ hammer2_xop_nresolve(hammer2_xop_t *arg, int clindex)
        if (chain) {
                if (chain->data->ipdata.meta.type == HAMMER2_OBJTYPE_HARDLINK) {
                        error = hammer2_chain_hardlink_find(
-                                               xop->head.ip,
+                                               xop->head.ip1,
                                                &parent, &chain,
                                                HAMMER2_RESOLVE_SHARED);
                }
@@ -252,7 +252,7 @@ hammer2_xop_unlink(hammer2_xop_t *arg, int clindex)
        /*
         * Requires exclusive lock
         */
-       parent = hammer2_inode_chain(xop->head.ip, clindex,
+       parent = hammer2_inode_chain(xop->head.ip1, clindex,
                                     HAMMER2_RESOLVE_ALWAYS);
        if (parent == NULL) {
                kprintf("xop_nresolve: NULL parent\n");
@@ -260,8 +260,8 @@ hammer2_xop_unlink(hammer2_xop_t *arg, int clindex)
                error = EIO;
                goto done;
        }
-       name = xop->head.name;
-       name_len = xop->head.name_len;
+       name = xop->head.name1;
+       name_len = xop->head.name1_len;
 
        /*
         * Lookup the directory entry
@@ -329,7 +329,7 @@ hammer2_xop_unlink(hammer2_xop_t *arg, int clindex)
        if (chain) {
                if (chain->data->ipdata.meta.type == HAMMER2_OBJTYPE_HARDLINK) {
                        error = hammer2_chain_hardlink_find(
-                                               xop->head.ip,
+                                               xop->head.ip1,
                                                &parent, &chain,
                                                0);
                        if (chain &&
@@ -543,7 +543,7 @@ hammer2_xop_nrename(hammer2_xop_t *arg, int clindex)
                }
        } else {
                /*
-                * head.ip is fdip, do a namespace search.
+                * head.ip1 is fdip, do a namespace search.
                 */
                const hammer2_inode_data_t *ripdata;
                hammer2_key_t lhc;
@@ -551,7 +551,7 @@ hammer2_xop_nrename(hammer2_xop_t *arg, int clindex)
                const char *name;
                size_t name_len;
 
-               parent = hammer2_inode_chain(xop->head.ip, clindex,
+               parent = hammer2_inode_chain(xop->head.ip1, clindex,
                                             HAMMER2_RESOLVE_ALWAYS |
                                             HAMMER2_RESOLVE_SHARED);
                if (parent == NULL) {
@@ -559,8 +559,8 @@ hammer2_xop_nrename(hammer2_xop_t *arg, int clindex)
                        error = EIO;
                        goto done;
                }
-               name = xop->head.name;
-               name_len = xop->head.name_len;
+               name = xop->head.name1;
+               name_len = xop->head.name1_len;
 
                /*
                 * Lookup the directory entry
@@ -608,8 +608,8 @@ hammer2_xop_nrename(hammer2_xop_t *arg, int clindex)
         * XXX in-inode parent directory specification?
         */
        if (chain->data->ipdata.meta.name_key != xop->lhc ||
-           xop->head.name_len != xop->head.name2_len ||
-           bcmp(xop->head.name, xop->head.name2, xop->head.name_len) != 0) {
+           xop->head.name1_len != xop->head.name2_len ||
+           bcmp(xop->head.name1, xop->head.name2, xop->head.name1_len) != 0) {
                hammer2_inode_data_t *wipdata;
 
                hammer2_chain_modify(chain, xop->head.mtid, 0);
@@ -675,7 +675,7 @@ hammer2_xop_scanlhc(hammer2_xop_t *arg, int clindex)
        int cache_index = -1;   /* XXX */
        int error = 0;
 
-       parent = hammer2_inode_chain(xop->head.ip, clindex,
+       parent = hammer2_inode_chain(xop->head.ip1, clindex,
                                     HAMMER2_RESOLVE_ALWAYS |
                                     HAMMER2_RESOLVE_SHARED);
        if (parent == NULL) {
@@ -734,7 +734,7 @@ hammer2_xop_lookup(hammer2_xop_t *arg, int clindex)
        int cache_index = -1;   /* XXX */
        int error = 0;
 
-       parent = hammer2_inode_chain(xop->head.ip, clindex,
+       parent = hammer2_inode_chain(xop->head.ip1, clindex,
                                     HAMMER2_RESOLVE_ALWAYS |
                                     HAMMER2_RESOLVE_SHARED);
        chain = NULL;
@@ -785,7 +785,7 @@ hammer2_xop_scanall(hammer2_xop_t *arg, int clindex)
         * The inode's chain is the iterator.  If we cannot acquire it our
         * contribution ends here.
         */
-       parent = hammer2_inode_chain(xop->head.ip, clindex,
+       parent = hammer2_inode_chain(xop->head.ip1, clindex,
                                     HAMMER2_RESOLVE_ALWAYS |
                                     HAMMER2_RESOLVE_SHARED);
        if (parent == NULL) {