hammer2 - Add xxhash to H2 and throw in debug stuff for performance testing.
authorMatthew Dillon <dillon@apollo.backplane.com>
Wed, 8 Jun 2016 23:06:51 +0000 (16:06 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Wed, 8 Jun 2016 23:06:51 +0000 (16:06 -0700)
* Add the xxhash.  This is a high-speed non-cryptographic hash code
  algorithm.  Sam pointed me at the site, the code is available on
  github and is BSD licensed:

      git://github.com/Cyan4973/xxHash.git

  This hash has good distribution and is very fast.

* Change HAMMER2 to default to using xxhash64 instead of iscsi_crc32().
  xxhash can process data at several GBytes/sec where as even the
  multi-table iscsi_crc32() can only do around 500 MBytes/sec, which
  is too slow for today's modern storage subsystems (NVME can nominally
  do 1.5-2.5 GBytes/sec, and high-end cards can do 5GBytes/sec).

* There are four major paths that eat tons of CPU in H2:

  - The XIO path does a ton of allocation/deallocation and synchronous
    messaging.  This has not yet been fixed.

  - The check code (when it was iscsi_crc32()) slowed everything down.
    This is fixed, the default check code is now xxhash64.

  - The check code was being called over and over again for the same cached
    buffer due to the hammer2_chain_t structure being thrown away.

    Currently a hack involving a mask stored in the underlying DIO is being
    used to indicate that the check code was previously valid.  This is
    strictly temporary.  The actual mask will have to be stored in the
    device buffer cache buffer and a second one in the chain structure.

    The chain structure must be made persistent as well (not yet done).

  - The DEDUP code was also calling iscsi_crc32() redundantly (at least for
    reads).

    The read path has been fixed.  The write path is doable but requires more
    coding (not yet fixed).

  - The logical file cluster_read() in the kernel was not doing any read-ahead
    due to H2 not implementing BMAP, creating long synchronous latencies.

    The kernel code for cluster_read() and cluster_readcb() has been fixed
    to do read-ahead whether a logical BMAP is implemented or not.  H2 will
    now pipeline reads.

Suggested-by: Samuel J. Greear <sjg@thesjg.com> (xxhash)
22 files changed:
sbin/hammer2/Makefile
sbin/hammer2/cmd_debug.c
sbin/hammer2/cmd_info.c
sbin/hammer2/cmd_setcheck.c
sbin/hammer2/hammer2.h
sbin/newfs_hammer2/Makefile
sbin/newfs_hammer2/newfs_hammer2.c
sys/vfs/hammer2/Makefile
sys/vfs/hammer2/hammer2.h
sys/vfs/hammer2/hammer2_chain.c
sys/vfs/hammer2/hammer2_disk.h
sys/vfs/hammer2/hammer2_flush.c
sys/vfs/hammer2/hammer2_inode.c
sys/vfs/hammer2/hammer2_io.c
sys/vfs/hammer2/hammer2_ioctl.c
sys/vfs/hammer2/hammer2_strategy.c
sys/vfs/hammer2/hammer2_thread.c
sys/vfs/hammer2/hammer2_vfsops.c
sys/vfs/hammer2/hammer2_vnops.c
sys/vfs/hammer2/hammer2_xxhash.h [new file with mode: 0644]
sys/vfs/hammer2/xxhash/xxhash.c [new file with mode: 0644]
sys/vfs/hammer2/xxhash/xxhash.h [new file with mode: 0644]

index 124b29e..029396b 100644 (file)
@@ -9,6 +9,7 @@ MAN=    hammer2.8
 #NOMAN=        TRUE
 DEBUG_FLAGS=-g
 
+CFLAGS+= -DXXH_NAMESPACE=h2_
 CFLAGS+= -I${.CURDIR}/../../sys
 CFLAGS+= -pthread
 LDADD= -ldmsg -lm -lutil -lmd -lcrypto
@@ -16,5 +17,7 @@ DPADD=        ${LIBDMSG} ${LIBM} ${LIBUTIL} ${LIBMD} ${LIBCRYPTO}
 
 #.PATH: ${.CURDIR}/../../sys/libkern
 #SRCS+= crc32.c
+.PATH: ${.CURDIR}/../../sys/vfs/hammer2/xxhash
+SRCS+= xxhash.c
 
 .include <bsd.prog.mk>
index 8ecddb0..4068630 100644 (file)
@@ -437,6 +437,7 @@ show_bref(int fd, int tab, int bi, hammer2_blockref_t *bref, int dofreemap)
        const char *type_str;
        char *str = NULL;
        uint32_t cv;
+       uint64_t cv64;
 
        switch(bref->type) {
        case HAMMER2_BREF_TYPE_EMPTY:
@@ -538,8 +539,16 @@ show_bref(int fd, int tab, int bi, hammer2_blockref_t *bref, int dofreemap)
                                printf("(meth %02x) ", bref->methods);
                        }
                        break;
-               case HAMMER2_CHECK_CRC64:
-                       printf("(meth %02x) ", bref->methods);
+               case HAMMER2_CHECK_XXHASH64:
+                       cv64 = XXH64(&media, bytes, XXH_HAMMER2_SEED);
+                       if (bref->check.xxhash64.value != cv64) {
+                               printf("(xxhash64 %02x:%016jx/%016jx) ",
+                                      bref->methods,
+                                      bref->check.xxhash64.value,
+                                      cv64);
+                       } else {
+                               printf("(meth %02x) ", bref->methods);
+                       }
                        break;
                case HAMMER2_CHECK_SHA192:
                        printf("(meth %02x) ", bref->methods);
index 8b1b98c..3c2f337 100644 (file)
@@ -257,6 +257,7 @@ h2pfs_check(int fd, hammer2_blockref_t *bref,
        int i;
        size_t bytes;
        uint32_t cv;
+       uint64_t cv64;
 
        bytes = (size_t)1 << (bref->data_off & HAMMER2_OFF_MASK_RADIX);
 
@@ -314,7 +315,14 @@ h2pfs_check(int fd, hammer2_blockref_t *bref,
                                       cv);
                        }
                        break;
-               case HAMMER2_CHECK_CRC64:
+               case HAMMER2_CHECK_XXHASH64:
+                       cv64 = XXH64(&media, bytes, XXH_HAMMER2_SEED);
+                       if (bref->check.xxhash64.value != cv64) {
+                               printf("\t(xxhash failed %02x:%016jx/%016jx)\n",
+                                      bref->methods,
+                                      bref->check.xxhash64.value,
+                                      cv64);
+                       }
                        break;
                case HAMMER2_CHECK_SHA192:
                        break;
index 34a0472..561b950 100644 (file)
@@ -57,8 +57,8 @@ cmd_setcheck(const char *check_str, char **paths)
                                break;
                }
                if (check_algo < 0 && strcasecmp(check_str, "default") == 0) {
-                       check_algo = HAMMER2_CHECK_ISCSI32;
-                       check_str = "crc32";
+                       check_algo = HAMMER2_CHECK_XXHASH64;
+                       check_str = "xxhash64";
                }
                if (check_algo < 0 && strcasecmp(check_str, "disabled") == 0) {
                        check_algo = HAMMER2_CHECK_DISABLED;
index 71b6d8a..9a73bab 100644 (file)
@@ -62,6 +62,7 @@
 #include <vfs/hammer2/hammer2_disk.h>
 #include <vfs/hammer2/hammer2_mount.h>
 #include <vfs/hammer2/hammer2_ioctl.h>
+#include <vfs/hammer2/hammer2_xxhash.h>
 
 #include <stdio.h>
 #include <stdlib.h>
index 398b212..76c802f 100644 (file)
@@ -3,10 +3,11 @@
 PROG=  newfs_hammer2
 MAN=   newfs_hammer2.8
 CFLAGS+= -I${.CURDIR}/../../sys -I${.CURDIR}/../hammer2
+cFLAGS+= -DXXH_NAMESPACE=h2_
 SRCS= newfs_hammer2.c
 
-.PATH: ${.CURDIR}/../../sys/libkern
+.PATH: ${.CURDIR}/../../sys/libkern ${.CURDIR}/../../sys/vfs/hammer2/xxhash
 #.PATH: ${.CURDIR}/../../sys/vfs/hammer2
-SRCS+= icrc32.c
+SRCS+= icrc32.c xxhash.c
 
 .include <bsd.prog.mk>
index 10b4c81..e33cfec 100644 (file)
@@ -38,6 +38,7 @@
 #include <sys/stat.h>
 #include <sys/time.h>
 #include <sys/sysctl.h>
+#include <vfs/hammer2/hammer2_xxhash.h>
 #include <vfs/hammer2/hammer2_disk.h>
 
 #include <stdio.h>
@@ -589,12 +590,12 @@ format_hammer2(int fd, hammer2_off_t total_space, hammer2_off_t free_space)
                        rawip->meta.comp_algo = HAMMER2_ENC_ALGO(
                                                    HAMMER2_COMP_AUTOZERO);
                        rawip->meta.check_algo = HAMMER2_ENC_ALGO(
-                                                   HAMMER2_CHECK_ISCSI32);
+                                                   HAMMER2_CHECK_XXHASH64);
                } else  {
                        rawip->meta.comp_algo = HAMMER2_ENC_ALGO(
                                                    HAMMER2_COMP_NEWFS_DEFAULT);
                        rawip->meta.check_algo = HAMMER2_ENC_ALGO(
-                                                   HAMMER2_CHECK_ISCSI32);
+                                                   HAMMER2_CHECK_XXHASH64);
                }
 
                /*
@@ -623,11 +624,11 @@ format_hammer2(int fd, hammer2_off_t total_space, hammer2_off_t free_space)
                root_blockref[i].key = rawip->meta.name_key;
                root_blockref[i].copyid = HAMMER2_COPYID_LOCAL;
                root_blockref[i].keybits = 0;
-               root_blockref[i].check.iscsi32.value =
-                               hammer2_icrc32(rawip, sizeof(*rawip));
+               root_blockref[i].check.xxhash64.value =
+                               XXH64(rawip, sizeof(*rawip), XXH_HAMMER2_SEED);
                root_blockref[i].type = HAMMER2_BREF_TYPE_INODE;
                root_blockref[i].methods =
-                               HAMMER2_ENC_CHECK(HAMMER2_CHECK_ISCSI32) |
+                               HAMMER2_ENC_CHECK(HAMMER2_CHECK_XXHASH64) |
                                HAMMER2_ENC_COMP(HAMMER2_COMP_NONE);
                root_blockref[i].mirror_tid = 16;
                root_blockref[i].flags = HAMMER2_BREF_FLAG_PFSROOT;
@@ -657,7 +658,7 @@ format_hammer2(int fd, hammer2_off_t total_space, hammer2_off_t free_space)
        rawip->meta.name_key = 0;
 
        rawip->meta.comp_algo = HAMMER2_ENC_ALGO(HAMMER2_COMP_AUTOZERO);
-       rawip->meta.check_algo = HAMMER2_ENC_ALGO(HAMMER2_CHECK_ISCSI32);
+       rawip->meta.check_algo = HAMMER2_ENC_ALGO(HAMMER2_CHECK_XXHASH64);
 
        /*
         * The super-root is flagged as a PFS and typically given its own
@@ -695,10 +696,10 @@ format_hammer2(int fd, hammer2_off_t total_space, hammer2_off_t free_space)
         */
        sroot_blockref.copyid = HAMMER2_COPYID_LOCAL;
        sroot_blockref.keybits = 0;
-       sroot_blockref.check.iscsi32.value =
-                                       hammer2_icrc32(rawip, sizeof(*rawip));
+       sroot_blockref.check.xxhash64.value =
+                                       XXH64(rawip, sizeof(*rawip), XXH_HAMMER2_SEED);
        sroot_blockref.type = HAMMER2_BREF_TYPE_INODE;
-       sroot_blockref.methods = HAMMER2_ENC_CHECK(HAMMER2_CHECK_ISCSI32) |
+       sroot_blockref.methods = HAMMER2_ENC_CHECK(HAMMER2_CHECK_XXHASH64) |
                                 HAMMER2_ENC_COMP(HAMMER2_COMP_AUTOZERO);
        sroot_blockref.mirror_tid = 16;
        rawip = NULL;
index 08ba801..0bb18b7 100644 (file)
@@ -1,9 +1,10 @@
 # Makefile for hammer2 vfs
 #
 #
-.PATH: ${.CURDIR} ${.CURDIR}/zlib
+.PATH: ${.CURDIR} ${.CURDIR}/zlib ${.CURDIR}/xxhash
 
 CFLAGS+= -DINVARIANTS -DSMP
+CFLAGS+= -DXXH_NAMESPACE=h2_
 KMOD=  hammer2
 SRCS=  hammer2_vfsops.c hammer2_vnops.c hammer2_xops.c hammer2_inode.c
 SRCS+= hammer2_strategy.c
@@ -15,5 +16,6 @@ SRCS+=  hammer2_zlib_adler32.c hammer2_zlib_deflate.c
 SRCS+=  hammer2_zlib_inffast.c hammer2_zlib_inflate.c
 SRCS+=  hammer2_zlib_inftrees.c hammer2_zlib_trees.c
 SRCS+=  hammer2_zlib_zutil.c
+SRCS+= xxhash.c
 
 .include <bsd.kmod.mk>
index def4d35..5114e2c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2015 The DragonFly Project.  All rights reserved.
+ * Copyright (c) 2011-2016 The DragonFly Project.  All rights reserved.
  *
  * This code is derived from software contributed to The DragonFly Project
  * by Matthew Dillon <dillon@dragonflybsd.org>
@@ -90,6 +90,7 @@
 #include <sys/mutex2.h>
 #include <sys/thread2.h>
 
+#include "hammer2_xxhash.h"
 #include "hammer2_disk.h"
 #include "hammer2_mount.h"
 #include "hammer2_ioctl.h"
@@ -294,6 +295,7 @@ struct hammer2_io {
        struct spinlock spin;
        struct hammer2_dev *hmp;
        struct buf      *bp;
+       uint64_t        crc_good_mask;
        off_t           pbase;
        int             psize;
        int             refs;
@@ -304,7 +306,7 @@ typedef struct hammer2_io hammer2_io_t;
 
 #define HAMMER2_DIO_INPROG     0x80000000      /* bio in progress */
 #define HAMMER2_DIO_GOOD       0x40000000      /* dio->bp is stable */
-#define HAMMER2_DIO_WAITING    0x20000000      /* (old) */
+#define HAMMER2_DIO_WAITING    0x20000000      /* wait for inprog clr */
 #define HAMMER2_DIO_DIRTY      0x10000000      /* flush on last drop */
 
 #define HAMMER2_DIO_MASK       0x0FFFFFFF
@@ -369,7 +371,7 @@ RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp);
 #define HAMMER2_CHAIN_INITIAL          0x00000020      /* initial create */
 #define HAMMER2_CHAIN_UPDATE           0x00000040      /* need parent update */
 #define HAMMER2_CHAIN_DEFERRED         0x00000080      /* flush depth defer */
-#define HAMMER2_CHAIN_UNUSED000001000  0x00000100
+#define HAMMER2_CHAIN_TESTEDGOOD       0x00000100      /* crc tested good */
 #define HAMMER2_CHAIN_ONFLUSH          0x00000200      /* on a flush list */
 #define HAMMER2_CHAIN_FICTITIOUS       0x00000400      /* unsuitable for I/O */
 #define HAMMER2_CHAIN_VOLUMESYNC       0x00000800      /* needs volume sync */
@@ -563,6 +565,8 @@ RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp);
 #define HAMMER2_XOPGROUPS_MASK (HAMMER2_XOPGROUPS - 1)
 #define HAMMER2_XOPMASK_VOP    0x80000000U
 
+#define HAMMER2_SPECTHREADS    1       /* sync */
+
 struct hammer2_cluster_item {
        hammer2_chain_t         *chain;
        int                     cache_index;
@@ -705,6 +709,7 @@ struct hammer2_inode {
        uint8_t                 comp_heuristic;
        hammer2_inode_meta_t    meta;           /* copy of meta-data */
        hammer2_off_t           osize;
+       hammer2_cluster_t       *cluster_cache;
 };
 
 typedef struct hammer2_inode hammer2_inode_t;
@@ -808,8 +813,9 @@ typedef struct hammer2_thread hammer2_thread_t;
  */
 struct hammer2_dedup {
        hammer2_off_t   data_off;
-       uint32_t        data_crc;
+       uint64_t        data_crc;
        uint32_t        ticks;
+       uint32_t        unused03;
 };
 
 typedef struct hammer2_dedup hammer2_dedup_t;
@@ -1147,7 +1153,8 @@ struct hammer2_pfs {
        int                     has_xop_threads;
        struct spinlock         xop_spin;       /* xop sequencer */
        hammer2_xop_group_t     xop_groups[HAMMER2_XOPGROUPS];
-       hammer2_xop_list_t      xopq[HAMMER2_MAXCLUSTER];
+       hammer2_xop_list_t      xopq[HAMMER2_MAXCLUSTER][HAMMER2_XOPGROUPS+
+                                                        HAMMER2_SPECTHREADS];
 };
 
 typedef struct hammer2_pfs hammer2_pfs_t;
@@ -1242,6 +1249,8 @@ extern int hammer2_hardlink_enable;
 extern int hammer2_flush_pipe;
 extern int hammer2_synchronous_flush;
 extern int hammer2_dio_count;
+extern long hammer2_chain_allocs;
+extern long hammer2_chain_frees;
 extern long hammer2_limit_dirty_chains;
 extern long hammer2_count_modified_chains;
 extern long hammer2_iod_file_read;
@@ -1268,6 +1277,9 @@ extern long hammer2_ioa_indr_write;
 extern long hammer2_ioa_fmap_write;
 extern long hammer2_ioa_volu_write;
 
+extern long hammer2_check_xxhash64;
+extern long hammer2_check_icrc32;
+
 extern struct objcache *cache_buffer_read;
 extern struct objcache *cache_buffer_write;
 extern struct objcache *cache_xops;
@@ -1481,9 +1493,12 @@ void hammer2_io_bdwrite(hammer2_io_t **diop);
 int hammer2_io_bwrite(hammer2_io_t **diop);
 int hammer2_io_isdirty(hammer2_io_t *dio);
 void hammer2_io_setdirty(hammer2_io_t *dio);
-void hammer2_io_setinval(hammer2_io_t *dio, u_int bytes);
+void hammer2_io_setinval(hammer2_io_t *dio, hammer2_off_t off, u_int bytes);
 void hammer2_io_brelse(hammer2_io_t **diop);
 void hammer2_io_bqrelse(hammer2_io_t **diop);
+int hammer2_io_crc_good(hammer2_chain_t *chain, uint64_t *maskp);
+void hammer2_io_crc_setmask(hammer2_io_t *dio, uint64_t mask);
+void hammer2_io_crc_clrmask(hammer2_io_t *dio, uint64_t mask);
 
 /*
  * hammer2_thread.c
index d7ed30e..d1984dd 100644 (file)
@@ -88,6 +88,17 @@ static hammer2_chain_t *hammer2_combined_find(
  */
 RB_GENERATE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp);
 
+extern int h2timer[32];
+extern int h2last;
+extern int h2lid;
+
+#define TIMER(which)    do {                            \
+        if (h2last)                                     \
+                h2timer[h2lid] += (int)(ticks - h2last);\
+        h2last = ticks;                                 \
+       h2lid = which;                                  \
+} while(0)
+
 int
 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2)
 {
@@ -172,6 +183,8 @@ hammer2_chain_alloc(hammer2_dev_t *hmp, hammer2_pfs_t *pmp,
        hammer2_chain_t *chain;
        u_int bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
 
+       atomic_add_long(&hammer2_chain_allocs, 1);
+
        /*
         * Construct the appropriate system structure.
         */
@@ -634,6 +647,7 @@ hammer2_chain_lock(hammer2_chain_t *chain, int how)
        KKASSERT(chain->refs > 0);
        atomic_add_int(&chain->lockcnt, 1);
 
+       TIMER(20);
        /*
         * Get the appropriate lock.
         */
@@ -642,6 +656,7 @@ hammer2_chain_lock(hammer2_chain_t *chain, int how)
        else
                hammer2_mtx_ex(&chain->lock);
        ++curthread->td_tracker;
+       TIMER(21);
 
        /*
         * If we already have a valid data pointer no further action is
@@ -649,6 +664,7 @@ hammer2_chain_lock(hammer2_chain_t *chain, int how)
         */
        if (chain->data)
                return;
+       TIMER(22);
 
        /*
         * Do we have to resolve the data?
@@ -738,6 +754,7 @@ hammer2_chain_load_data(hammer2_chain_t *chain)
         */
        if (chain->data)
                return;
+       TIMER(23);
 
        hmp = chain->hmp;
        KKASSERT(hmp != NULL);
@@ -767,6 +784,7 @@ hammer2_chain_load_data(hammer2_chain_t *chain)
                        /* retry */
                }
        }
+       TIMER(24);
 
        /*
         * We own CHAIN_IOINPROG
@@ -803,6 +821,7 @@ hammer2_chain_load_data(hammer2_chain_t *chain)
                                         &chain->dio);
                hammer2_adjreadcounter(&chain->bref, chain->bytes);
        }
+       TIMER(25);
        if (error) {
                chain->error = HAMMER2_ERROR_IO;
                kprintf("hammer2_chain_lock: I/O error %016jx: %d\n",
@@ -832,8 +851,13 @@ hammer2_chain_load_data(hammer2_chain_t *chain)
                 * cache, which might not be true (need biodep on flush
                 * to calculate crc?  or simple crc?).
                 */
-       } else {
-               if (hammer2_chain_testcheck(chain, bdata) == 0) {
+       } else if ((chain->flags & HAMMER2_CHAIN_TESTEDGOOD) == 0) {
+               uint64_t mask;
+
+       TIMER(26);
+               if (hammer2_io_crc_good(chain, &mask)) {
+                       chain->flags |= HAMMER2_CHAIN_TESTEDGOOD;
+               } else if (hammer2_chain_testcheck(chain, bdata) == 0) {
                        kprintf("chain %016jx.%02x meth=%02x "
                                "CHECK FAIL %08x (flags=%08x)\n",
                                chain->bref.data_off,
@@ -842,8 +866,17 @@ hammer2_chain_load_data(hammer2_chain_t *chain)
                                hammer2_icrc32(bdata, chain->bytes),
                                chain->flags);
                        chain->error = HAMMER2_ERROR_CHECK;
+               } else {
+                       hammer2_io_crc_setmask(chain->dio, mask);
+#if 0
+                       kprintf("chain %02x %016jx %u\n",
+                               chain->bref.type, chain->bref.key,
+                               chain->bytes);
+#endif
+                       chain->flags |= HAMMER2_CHAIN_TESTEDGOOD;
                }
        }
+       TIMER(27);
 
        /*
         * Setup the data pointer, either pointing it to an embedded data
@@ -896,6 +929,7 @@ done:
                        break;
                }
        }
+       TIMER(28);
 }
 
 /*
@@ -1807,6 +1841,7 @@ hammer2_chain_getparent(hammer2_chain_t **parentp, int how)
  *       BREF_TYPE_DATA as the device buffer can alias the logical file
  *       buffer).
  */
+
 hammer2_chain_t *
 hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp,
                     hammer2_key_t key_beg, hammer2_key_t key_end,
@@ -1827,6 +1862,8 @@ hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp,
        int generation;
        int maxloops = 300000;
 
+       TIMER(8);
+
        if (flags & HAMMER2_LOOKUP_ALWAYS) {
                how_maybe = how_always;
                how = HAMMER2_RESOLVE_ALWAYS;
@@ -1863,6 +1900,7 @@ hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp,
        }
 
 again:
+       TIMER(9);
        if (--maxloops == 0)
                panic("hammer2_chain_lookup: maxloops");
        /*
@@ -1944,6 +1982,7 @@ again:
                base = NULL;    /* safety */
                count = 0;      /* safety */
        }
+       TIMER(10);
 
        /*
         * Merged scan to find next candidate.
@@ -1957,6 +1996,8 @@ again:
        if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0)
                hammer2_chain_countbrefs(parent, base, count);
 
+       TIMER(11);
+
        /*
         * Combined search
         */
@@ -1967,10 +2008,13 @@ again:
                                      &bref);
        generation = parent->core.generation;
 
+       TIMER(12);
+
        /*
         * Exhausted parent chain, iterate.
         */
        if (bref == NULL) {
+               TIMER(13);
                hammer2_spin_unex(&parent->core.spin);
                if (key_beg == key_end) /* short cut single-key case */
                        return (NULL);
@@ -1999,6 +2043,7 @@ again:
         * Selected from blockref or in-memory chain.
         */
        if (chain == NULL) {
+               TIMER(14);
                bcopy = *bref;
                hammer2_spin_unex(&parent->core.spin);
                chain = hammer2_chain_get(parent, generation,
@@ -2013,10 +2058,12 @@ again:
                        goto again;
                }
        } else {
+               TIMER(15);
                hammer2_chain_ref(chain);
                hammer2_spin_unex(&parent->core.spin);
        }
 
+       TIMER(16);
        /*
         * chain is referenced but not locked.  We must lock the chain
         * to obtain definitive DUPLICATED/DELETED state
@@ -2027,6 +2074,7 @@ again:
        } else {
                hammer2_chain_lock(chain, how);
        }
+       TIMER(17);
 
        /*
         * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX)
@@ -2049,6 +2097,7 @@ again:
                        return(NULL);
                goto again;
        }
+       TIMER(18);
 
        /*
         * If the chain element is an indirect block it becomes the new
@@ -2073,6 +2122,7 @@ again:
                *parentp = parent = chain;
                goto again;
        }
+       TIMER(19);
 done:
        /*
         * All done, return the chain.
@@ -2086,6 +2136,7 @@ done:
                if (flags & HAMMER2_LOOKUP_NOLOCK)
                        hammer2_chain_unlock(chain);
        }
+       TIMER(20);
 
        return (chain);
 }
@@ -4161,9 +4212,9 @@ hammer2_chain_setcheck(hammer2_chain_t *chain, void *bdata)
                chain->bref.check.iscsi32.value =
                        hammer2_icrc32(bdata, chain->bytes);
                break;
-       case HAMMER2_CHECK_CRC64:
-               chain->bref.check.crc64.value = 0;
-               /* XXX */
+       case HAMMER2_CHECK_XXHASH64:
+               chain->bref.check.xxhash64.value =
+                       XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED);
                break;
        case HAMMER2_CHECK_SHA192:
                {
@@ -4211,10 +4262,12 @@ hammer2_chain_testcheck(hammer2_chain_t *chain, void *bdata)
        case HAMMER2_CHECK_ISCSI32:
                r = (chain->bref.check.iscsi32.value ==
                     hammer2_icrc32(bdata, chain->bytes));
+               hammer2_check_icrc32 += chain->bytes;
                break;
-       case HAMMER2_CHECK_CRC64:
-               r = (chain->bref.check.crc64.value == 0);
-               /* XXX */
+       case HAMMER2_CHECK_XXHASH64:
+               r = (chain->bref.check.xxhash64.value ==
+                    XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED));
+               hammer2_check_xxhash64 += chain->bytes;
                break;
        case HAMMER2_CHECK_SHA192:
                {
@@ -4393,6 +4446,8 @@ hammer2_chain_bulkdrop(hammer2_chain_t *copy)
                KKASSERT(copy->data);
                kfree(copy->data, copy->hmp->mchain);
                copy->data = NULL;
+               atomic_add_long(&hammer2_chain_allocs, -1);
+               break;
        default:
                break;
        }
index ac6230f..6a0464b 100644 (file)
@@ -597,7 +597,7 @@ struct hammer2_blockref {           /* MUST BE EXACTLY 64 BYTES */
                struct {
                        uint64_t value;
                        uint64_t reserved[7];
-               } crc64;
+               } xxhash64;
                struct {
                        char data[24];
                        char reserved[40];
@@ -664,13 +664,13 @@ typedef struct hammer2_blockref hammer2_blockref_t;
 #define HAMMER2_CHECK_NONE             0
 #define HAMMER2_CHECK_DISABLED         1
 #define HAMMER2_CHECK_ISCSI32          2
-#define HAMMER2_CHECK_CRC64            3
+#define HAMMER2_CHECK_XXHASH64         3
 #define HAMMER2_CHECK_SHA192           4
 #define HAMMER2_CHECK_FREEMAP          5
 
 /* user-specifiable check modes only */
 #define HAMMER2_CHECK_STRINGS          { "none", "disabled", "crc32", \
-                                         "crc64", "sha192" }
+                                         "xxhash64", "sha192" }
 #define HAMMER2_CHECK_STRINGS_COUNT    5
 
 /*
index a6d11e8..7156561 100644 (file)
@@ -801,7 +801,9 @@ again:
                if ((chain->flags & HAMMER2_CHAIN_DESTROY) &&
                    (chain->flags & HAMMER2_CHAIN_DEDUP) == 0 &&
                    chain->dio) {
-                       hammer2_io_setinval(chain->dio, chain->bytes);
+                       hammer2_io_setinval(chain->dio,
+                                           chain->bref.data_off,
+                                           chain->bytes);
                }
        }
 
index 3bccf89..48429ee 100644 (file)
@@ -143,12 +143,28 @@ hammer2_chain_t *
 hammer2_inode_chain(hammer2_inode_t *ip, int clindex, int how)
 {
        hammer2_chain_t *chain;
+       hammer2_cluster_t *cluster;
 
        hammer2_spin_sh(&ip->cluster_spin);
-       if (clindex >= ip->cluster.nchains)
+       cluster = ip->cluster_cache;
+       if (0 && cluster) {
+               if (clindex >= cluster->nchains)
+                       chain = NULL;
+               else
+                       chain = cluster->array[clindex].chain;
+               if (chain) {
+                       hammer2_chain_ref(chain);
+                       hammer2_spin_unsh(&ip->cluster_spin);
+                       hammer2_chain_lock(chain, how);
+                       return chain;
+               }
+       }
+
+       cluster = &ip->cluster;
+       if (clindex >= cluster->nchains)
                chain = NULL;
        else
-               chain = ip->cluster.array[clindex].chain;
+               chain = cluster->array[clindex].chain;
        if (chain) {
                hammer2_chain_ref(chain);
                hammer2_spin_unsh(&ip->cluster_spin);
@@ -314,6 +330,7 @@ hammer2_inode_drop(hammer2_inode_t *ip)
 {
        hammer2_pfs_t *pmp;
        hammer2_inode_t *pip;
+       hammer2_cluster_t *tmpclu;
        u_int refs;
 
        while (ip) {
@@ -349,6 +366,15 @@ hammer2_inode_drop(hammer2_inode_t *ip)
                                ip->pip = NULL;
                                ip->pmp = NULL;
 
+                               /*
+                                * Clean out the cluster cache
+                                */
+                               tmpclu = ip->cluster_cache;
+                               if (tmpclu) {
+                                       ip->cluster_cache = NULL;
+                                       hammer2_cluster_drop(tmpclu);
+                               }
+
                                /*
                                 * Cleaning out ip->cluster isn't entirely
                                 * trivial.
index f0732ba..811f1f0 100644 (file)
@@ -62,6 +62,29 @@ struct hammer2_cleanupcb_info {
        int     count;
 };
 
+static __inline
+uint64_t
+hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes)
+{
+       uint64_t mask;
+       int i;
+
+       if (bytes < 1024)       /* smaller chunks not supported */
+               return 0;
+
+       /*
+        * Calculate crc check mask for larger chunks
+        */
+       i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) &
+            HAMMER2_PBUFMASK) >> 10;
+       if (i == 0 && bytes == HAMMER2_PBUFSIZE)
+               return((uint64_t)-1);
+       mask = ((uint64_t)1U << (bytes >> 10)) - 1;
+       mask <<= i;
+
+       return mask;
+}
+
 #define HAMMER2_GETBLK_GOOD    0
 #define HAMMER2_GETBLK_QUEUED  1
 #define HAMMER2_GETBLK_OWNED   2
@@ -146,7 +169,7 @@ hammer2_io_getblk(hammer2_dev_t *hmp, off_t lbase, int lsize,
                 * Issue the iocb immediately if the buffer is already good.
                 * Once set GOOD cannot be cleared until refs drops to 0.
                 *
-                * lfence required because dio is not interlockedf for
+                * lfence required because dio's are not interlocked for
                 * the DIO_GOOD test.
                 */
                if (refs & HAMMER2_DIO_GOOD) {
@@ -638,6 +661,73 @@ hammer2_io_data(hammer2_io_t *dio, off_t lbase)
        return(bp->b_data + off);
 }
 
+/*
+ * Keep track of good CRCs in dio->good_crc_mask. XXX needs to be done
+ * in the chain structure, but chain structure needs to be persistent as
+ * well on refs=0 and it isn't.
+ */
+int
+hammer2_io_crc_good(hammer2_chain_t *chain, uint64_t *maskp)
+{
+       hammer2_io_t *dio;
+       uint64_t mask;
+
+       if ((dio = chain->dio) != NULL && chain->bytes >= 1024) {
+               mask = hammer2_io_mask(dio, chain->bref.data_off, chain->bytes);
+               *maskp = mask;
+               if ((dio->crc_good_mask & mask) == mask)
+                       return 1;
+               return 0;
+       }
+       *maskp = 0;
+
+       return 0;
+}
+
+void
+hammer2_io_crc_setmask(hammer2_io_t *dio, uint64_t mask)
+{
+       if (dio) {
+               if (sizeof(long) == 8) {
+                       atomic_set_long(&dio->crc_good_mask, mask);
+               } else {
+#if _BYTE_ORDER == _LITTLE_ENDIAN
+                       atomic_set_int(&((int *)&dio->crc_good_mask)[0],
+                                       (uint32_t)mask);
+                       atomic_set_int(&((int *)&dio->crc_good_mask)[1],
+                                       (uint32_t)(mask >> 32));
+#else
+                       atomic_set_int(&((int *)&dio->crc_good_mask)[0],
+                                       (uint32_t)(mask >> 32));
+                       atomic_set_int(&((int *)&dio->crc_good_mask)[1],
+                                       (uint32_t)mask);
+#endif
+               }
+       }
+}
+
+void
+hammer2_io_crc_clrmask(hammer2_io_t *dio, uint64_t mask)
+{
+       if (dio) {
+               if (sizeof(long) == 8) {
+                       atomic_clear_long(&dio->crc_good_mask, mask);
+               } else {
+#if _BYTE_ORDER == _LITTLE_ENDIAN
+                       atomic_clear_int(&((int *)&dio->crc_good_mask)[0],
+                                       (uint32_t)mask);
+                       atomic_clear_int(&((int *)&dio->crc_good_mask)[1],
+                                       (uint32_t)(mask >> 32));
+#else
+                       atomic_clear_int(&((int *)&dio->crc_good_mask)[0],
+                                       (uint32_t)(mask >> 32));
+                       atomic_clear_int(&((int *)&dio->crc_good_mask)[1],
+                                       (uint32_t)mask);
+#endif
+               }
+       }
+}
+
 /*
  * Helpers for hammer2_io_new*() functions
  */
@@ -777,13 +867,15 @@ hammer2_iocb_bread_callback(hammer2_iocb_t *iocb)
         * do what needs to be done with dio->bp.
         */
        if (iocb->flags & HAMMER2_IOCB_INPROG) {
+               int hce;
+
                if (dio->bp && (dio->bp->b_flags & B_CACHE)) {
                        /*
                         * Already good, likely due to being chained from
                         * another iocb.
                         */
                        error = 0;
-               } else if (hammer2_cluster_enable) {
+               } else if ((hce = hammer2_cluster_enable) != 0) {
                        /*
                         * Synchronous cluster I/O for now.
                         */
@@ -795,7 +887,7 @@ hammer2_iocb_bread_callback(hammer2_iocb_t *iocb)
                               ~HAMMER2_SEGMASK64;
                        error = cluster_read(dio->hmp->devvp, peof, dio->pbase,
                                             dio->psize,
-                                            dio->psize, HAMMER2_PBUFSIZE*4,
+                                            dio->psize, HAMMER2_PBUFSIZE*hce,
                                             &dio->bp);
                } else {
                        /*
@@ -887,8 +979,11 @@ hammer2_io_setdirty(hammer2_io_t *dio)
 }
 
 void
-hammer2_io_setinval(hammer2_io_t *dio, u_int bytes)
+hammer2_io_setinval(hammer2_io_t *dio, hammer2_off_t off, u_int bytes)
 {
+       uint64_t mask = hammer2_io_mask(dio, off, bytes);
+
+       hammer2_io_crc_clrmask(dio, mask);
        if ((u_int)dio->psize == bytes)
                dio->bp->b_flags |= B_INVAL | B_RELBUF;
 }
index 4016d4f..5223c08 100644 (file)
@@ -634,7 +634,7 @@ hammer2_ioctl_pfs_create(hammer2_inode_t *ip, void *data)
                nip->meta.comp_algo =
                        HAMMER2_ENC_ALGO(HAMMER2_COMP_NEWFS_DEFAULT);
                nip->meta.check_algo =
-                       HAMMER2_ENC_ALGO( HAMMER2_CHECK_ISCSI32);
+                       HAMMER2_ENC_ALGO( HAMMER2_CHECK_XXHASH64);
 
                if (strcasecmp(pfs->name, "boot") == 0) {
                        nip->meta.comp_algo =
index b48630d..e382055 100644 (file)
@@ -90,6 +90,17 @@ static void hammer2_dedup_record(hammer2_chain_t *chain, char *data);
 static hammer2_off_t hammer2_dedup_lookup(hammer2_dev_t *hmp,
                        char **datap, int pblksize);
 
+int h2timer[32];
+int h2last;
+int h2lid;
+
+#define TIMER(which)   do {                            \
+       if (h2last)                                     \
+               h2timer[h2lid] += (int)(ticks - h2last);\
+       h2last = ticks;                                 \
+       h2lid = which;                                  \
+} while(0)
+
 int
 hammer2_vop_strategy(struct vop_strategy_args *ap)
 {
@@ -125,7 +136,9 @@ hammer2_vop_strategy(struct vop_strategy_args *ap)
  * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb)
  *
  * Basically disabled, the logical buffer write thread has to deal with
- * buffers one-at-a-time.
+ * buffers one-at-a-time.  Note that this should not prevent cluster_read()
+ * from reading-ahead, it simply prevents it from trying form a single
+ * cluster buffer for the logical request.  H2 already uses 64KB buffers!
  */
 int
 hammer2_vop_bmap(struct vop_bmap_args *ap)
@@ -288,6 +301,7 @@ hammer2_strategy_xop_read(hammer2_xop_t *arg, int clindex)
        int cache_index = -1;
        int error;
 
+       TIMER(0);
        lbase = xop->lbase;
        bio = xop->bio;
        bp = bio->bio_buf;
@@ -295,6 +309,7 @@ hammer2_strategy_xop_read(hammer2_xop_t *arg, int clindex)
        parent = hammer2_inode_chain(xop->head.ip1, clindex,
                                     HAMMER2_RESOLVE_ALWAYS |
                                     HAMMER2_RESOLVE_SHARED);
+       TIMER(1);
        if (parent) {
                chain = hammer2_chain_lookup(&parent, &key_dummy,
                                             lbase, lbase,
@@ -306,7 +321,9 @@ hammer2_strategy_xop_read(hammer2_xop_t *arg, int clindex)
                error = EIO;
                chain = NULL;
        }
+       TIMER(2);
        error = hammer2_xop_feed(&xop->head, chain, clindex, error);
+       TIMER(3);
        if (chain) {
                hammer2_chain_unlock(chain);
                hammer2_chain_drop(chain);
@@ -317,6 +334,7 @@ hammer2_strategy_xop_read(hammer2_xop_t *arg, int clindex)
        }
        chain = NULL;   /* safety */
        parent = NULL;  /* safety */
+       TIMER(4);
 
        /*
         * Race to finish the frontend
@@ -335,6 +353,7 @@ hammer2_strategy_xop_read(hammer2_xop_t *arg, int clindex)
         * frontend collection non-blocking.
         */
        error = hammer2_xop_collect(&xop->head, HAMMER2_XOP_COLLECT_NOWAIT);
+       TIMER(5);
 
        switch(error) {
        case 0:
@@ -367,6 +386,7 @@ hammer2_strategy_xop_read(hammer2_xop_t *arg, int clindex)
                hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
                break;
        }
+       TIMER(6);
 }
 
 static
@@ -611,6 +631,7 @@ hammer2_assign_physical(hammer2_inode_t *ip, hammer2_chain_t **parentp,
        *errorp = 0;
        KKASSERT(pblksize >= HAMMER2_ALLOC_MIN);
 retry:
+       TIMER(30);
        chain = hammer2_chain_lookup(parentp, &key_dummy,
                                     lbase, lbase,
                                     &cache_index,
@@ -670,6 +691,7 @@ retry:
                        break;
                }
        }
+       TIMER(31);
        return (chain);
 }
 
@@ -1223,13 +1245,47 @@ hammer2_dedup_record(hammer2_chain_t *chain, char *data)
 {
        hammer2_dev_t *hmp;
        hammer2_dedup_t *dedup;
-       int32_t crc;
+       uint64_t crc;
        int best = 0;
        int i;
        int dticks;
 
        hmp = chain->hmp;
-       crc = hammer2_icrc32(data, chain->bytes);
+
+       switch(HAMMER2_DEC_CHECK(chain->bref.methods)) {
+       case HAMMER2_CHECK_ISCSI32:
+               /*
+                * XXX use the built-in crc (the dedup lookup sequencing
+                * needs to be fixed so the check code is already present
+                * when dedup_lookup is called)
+                */
+#if 0
+               crc = (uint64_t)(uint32_t)chain->bref.check.iscsi32.value;
+#endif
+               crc = XXH64(data, chain->bytes, XXH_HAMMER2_SEED);
+               break;
+       case HAMMER2_CHECK_XXHASH64:
+               crc = chain->bref.check.xxhash64.value;
+               break;
+       case HAMMER2_CHECK_SHA192:
+               /*
+                * XXX use the built-in crc (the dedup lookup sequencing
+                * needs to be fixed so the check code is already present
+                * when dedup_lookup is called)
+                */
+#if 0
+               crc = ((uint64_t *)chain->bref.check.sha192.data)[0] ^
+                     ((uint64_t *)chain->bref.check.sha192.data)[1] ^
+                     ((uint64_t *)chain->bref.check.sha192.data)[2];
+#endif
+               crc = XXH64(data, chain->bytes, XXH_HAMMER2_SEED);
+               break;
+       default:
+               /*
+                * Cannot dedup without a check code
+                */
+               return;
+       }
        dedup = &hmp->heur_dedup[crc & (HAMMER2_DEDUP_HEUR_MASK & ~3)];
        for (i = 0; i < 4; ++i) {
                if (dedup[i].data_crc == crc) {
@@ -1242,7 +1298,7 @@ hammer2_dedup_record(hammer2_chain_t *chain, char *data)
        }
        dedup += best;
        if (hammer2_debug & 0x40000) {
-               kprintf("REC %04x %08x %016jx\n",
+               kprintf("REC %04x %016jx %016jx\n",
                        (int)(dedup - hmp->heur_dedup),
                        crc,
                        chain->bref.data_off);
@@ -1260,7 +1316,7 @@ hammer2_dedup_lookup(hammer2_dev_t *hmp, char **datap, int pblksize)
        hammer2_dedup_t *dedup;
        hammer2_io_t *dio;
        hammer2_off_t off;
-       uint32_t crc;
+       uint64_t crc;
        char *data;
        int i;
 
@@ -1268,11 +1324,16 @@ hammer2_dedup_lookup(hammer2_dev_t *hmp, char **datap, int pblksize)
        if (data == NULL)
                return 0;
 
-       crc = hammer2_icrc32(data, pblksize);
+       /*
+        * XXX use the built-in crc (the dedup lookup sequencing
+        * needs to be fixed so the check code is already present
+        * when dedup_lookup is called)
+        */
+       crc = XXH64(data, pblksize, XXH_HAMMER2_SEED);
        dedup = &hmp->heur_dedup[crc & (HAMMER2_DEDUP_HEUR_MASK & ~3)];
 
        if (hammer2_debug & 0x40000) {
-               kprintf("LOC %04x/4 %08x\n",
+               kprintf("LOC %04x/4 %016jx\n",
                        (int)(dedup - hmp->heur_dedup),
                        crc);
        }
index 718833f..a2af58d 100644 (file)
@@ -48,13 +48,14 @@ hammer2_thr_create(hammer2_thread_t *thr, hammer2_pfs_t *pmp,
 {
        lockinit(&thr->lk, "h2thr", 0, 0);
        thr->pmp = pmp;
-       thr->xopq = &pmp->xopq[clindex];
        thr->clindex = clindex;
        thr->repidx = repidx;
        if (repidx >= 0) {
-               lwkt_create(func, thr, &thr->td, NULL, 0, -1,
+               thr->xopq = &pmp->xopq[clindex][repidx];
+               lwkt_create(func, thr, &thr->td, NULL, 0, repidx % ncpus,
                            "%s-%s.%02d", id, pmp->pfs_names[clindex], repidx);
        } else {
+               thr->xopq = &pmp->xopq[clindex][HAMMER2_XOPGROUPS-repidx];
                lwkt_create(func, thr, &thr->td, NULL, 0, -1,
                            "%s-%s", id, pmp->pfs_names[clindex]);
        }
@@ -287,10 +288,8 @@ hammer2_xop_start_except(hammer2_xop_head_t *xop, hammer2_xop_func_t func,
        hammer2_thread_t *thr;
 #endif
        hammer2_pfs_t *pmp;
-#if 0
-       int g;
-#endif
        int i;
+       int ng;
        int nchains;
 
        ip1 = xop->ip1;
@@ -298,6 +297,10 @@ hammer2_xop_start_except(hammer2_xop_head_t *xop, hammer2_xop_func_t func,
        if (pmp->has_xop_threads == 0)
                hammer2_xop_helper_create(pmp);
 
+       /*ng = pmp->xop_iterator++;*/
+       ng = (int)(hammer2_icrc32(&xop->ip1, sizeof(xop->ip1)) ^
+                  hammer2_icrc32(&func, sizeof(func)));
+       ng = ng & HAMMER2_XOPGROUPS_MASK;
 #if 0
        g = pmp->xop_iterator++;
        g = g & HAMMER2_XOPGROUPS_MASK;
@@ -329,7 +332,7 @@ hammer2_xop_start_except(hammer2_xop_head_t *xop, hammer2_xop_func_t func,
                if (i != notidx && ip1->cluster.array[i].chain) {
                        atomic_set_int(&xop->run_mask, 1U << i);
                        atomic_set_int(&xop->chk_mask, 1U << i);
-                       TAILQ_INSERT_TAIL(&pmp->xopq[i], xop, collect[i].entry);
+                       TAILQ_INSERT_TAIL(&pmp->xopq[i][ng], xop, collect[i].entry);
                }
        }
        hammer2_spin_unex(&pmp->xop_spin);
@@ -340,7 +343,7 @@ hammer2_xop_start_except(hammer2_xop_head_t *xop, hammer2_xop_func_t func,
         */
        for (i = 0; i < nchains; ++i) {
                if (i != notidx)
-                       wakeup_one(&pmp->xopq[i]);
+                       wakeup(&pmp->xopq[i][ng]);
        }
 }
 
@@ -357,6 +360,7 @@ void
 hammer2_xop_retire(hammer2_xop_head_t *xop, uint32_t mask)
 {
        hammer2_chain_t *chain;
+       hammer2_inode_t *ip;
        int i;
 
        /*
@@ -373,6 +377,21 @@ hammer2_xop_retire(hammer2_xop_head_t *xop, uint32_t mask)
                return;
        }
 
+       /*
+        * Cache the terminating cluster.
+        */
+       if ((ip = xop->ip1) != NULL) {
+               hammer2_cluster_t *tmpclu;
+
+               tmpclu = hammer2_cluster_copy(&xop->cluster);
+               hammer2_spin_ex(&ip->cluster_spin);
+               tmpclu = atomic_swap_ptr((volatile void **)&ip->cluster_cache,
+                                        tmpclu);
+               hammer2_spin_unex(&ip->cluster_spin);
+               if (tmpclu)
+                       hammer2_cluster_drop(tmpclu);
+       }
+
        /*
         * All collectors are gone, we can cleanup and dispose of the XOP.
         * Note that this can wind up being a frontend OR a backend.
index 68d0297..0769761 100644 (file)
@@ -78,11 +78,13 @@ static struct hammer2_pfslist hammer2_pfslist;
 static struct lock hammer2_mntlk;
 
 int hammer2_debug;
-int hammer2_cluster_enable = 1;
+int hammer2_cluster_enable = 4;
 int hammer2_hardlink_enable = 1;
 int hammer2_flush_pipe = 100;
 int hammer2_synchronous_flush = 1;
 int hammer2_dio_count;
+long hammer2_chain_allocs;
+long hammer2_chain_frees;
 long hammer2_limit_dirty_chains;
 long hammer2_count_modified_chains;
 long hammer2_iod_file_read;
@@ -129,6 +131,10 @@ SYSCTL_INT(_vfs_hammer2, OID_AUTO, flush_pipe, CTLFLAG_RW,
           &hammer2_flush_pipe, 0, "");
 SYSCTL_INT(_vfs_hammer2, OID_AUTO, synchronous_flush, CTLFLAG_RW,
           &hammer2_synchronous_flush, 0, "");
+SYSCTL_LONG(_vfs_hammer2, OID_AUTO, chain_allocs, CTLFLAG_RW,
+          &hammer2_chain_allocs, 0, "");
+SYSCTL_LONG(_vfs_hammer2, OID_AUTO, chain_frees, CTLFLAG_RW,
+          &hammer2_chain_frees, 0, "");
 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, limit_dirty_chains, CTLFLAG_RW,
           &hammer2_limit_dirty_chains, 0, "");
 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, count_modified_chains, CTLFLAG_RW,
@@ -186,6 +192,13 @@ SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_fmap_write, CTLFLAG_RW,
 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_volu_write, CTLFLAG_RW,
           &hammer2_ioa_volu_write, 0, "");
 
+long hammer2_check_icrc32;
+long hammer2_check_xxhash64;
+SYSCTL_LONG(_vfs_hammer2, OID_AUTO, check_icrc32, CTLFLAG_RW,
+          &hammer2_check_icrc32, 0, "");
+SYSCTL_LONG(_vfs_hammer2, OID_AUTO, check_xxhash64, CTLFLAG_RW,
+          &hammer2_check_xxhash64, 0, "");
+
 static int hammer2_vfs_init(struct vfsconf *conf);
 static int hammer2_vfs_uninit(struct vfsconf *vfsp);
 static int hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
@@ -377,10 +390,14 @@ hammer2_pfsalloc(hammer2_chain_t *chain, const hammer2_inode_data_t *ripdata,
                /*
                 * Distribute backend operations to threads
                 */
-               for (j = 0; j < HAMMER2_MAXCLUSTER; ++j)
-                       TAILQ_INIT(&pmp->xopq[j]);
-               for (j = 0; j < HAMMER2_XOPGROUPS; ++j)
-                       hammer2_xop_group_init(pmp, &pmp->xop_groups[j]);
+               for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) {
+                       for (j = 0; j < HAMMER2_XOPGROUPS +
+                                       HAMMER2_SPECTHREADS; ++j) {
+                               TAILQ_INIT(&pmp->xopq[i][j]);
+                       }
+               }
+               for (i = 0; i < HAMMER2_XOPGROUPS; ++i)
+                       hammer2_xop_group_init(pmp, &pmp->xop_groups[i]);
 
                /*
                 * Save the last media transaction id for the flusher.  Set
index 6dbde4d..d501aff 100644 (file)
@@ -977,8 +977,11 @@ hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
                        bawrite(bp);
                } else if (ioflag & IO_ASYNC) {
                        bawrite(bp);
-               } else {
+               } else if (ip->vp->v_mount->mnt_flag & MNT_NOCLUSTERW) {
                        bdwrite(bp);
+               } else {
+                       bp->b_flags |= B_CLUSTEROK;
+                       cluster_write(bp, new_eof, lblksize, seqcount);
                }
        }
 
diff --git a/sys/vfs/hammer2/hammer2_xxhash.h b/sys/vfs/hammer2/hammer2_xxhash.h
new file mode 100644 (file)
index 0000000..5304bea
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2016 The DragonFly Project.  All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "xxhash/xxhash.h"
+
+#define XXH_HAMMER2_SEED        0x4d617474446c6c6eLLU
diff --git a/sys/vfs/hammer2/xxhash/xxhash.c b/sys/vfs/hammer2/xxhash/xxhash.c
new file mode 100644 (file)
index 0000000..8289f4b
--- /dev/null
@@ -0,0 +1,863 @@
+/*
+*  xxHash - Fast Hash algorithm
+*  Copyright (C) 2012-2016, Yann Collet
+*
+*  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+*
+*  Redistribution and use in source and binary forms, with or without
+*  modification, are permitted provided that the following conditions are
+*  met:
+*
+*  * Redistributions of source code must retain the above copyright
+*  notice, this list of conditions and the following disclaimer.
+*  * Redistributions in binary form must reproduce the above
+*  copyright notice, this list of conditions and the following disclaimer
+*  in the documentation and/or other materials provided with the
+*  distribution.
+*
+*  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+*  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+*  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+*  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+*  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+*  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+*  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+*  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+*  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+*  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+*  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*  You can contact the author at :
+*  - xxHash homepage: http://www.xxhash.com
+*  - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+
+/* *************************************
+*  Tuning parameters
+***************************************/
+/*!XXH_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
+ *            It can generate buggy code on targets which do not support unaligned memory accesses.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://stackoverflow.com/a/32095106/646947 for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define XXH_FORCE_MEMORY_ACCESS 2
+#  elif defined(__INTEL_COMPILER) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define XXH_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+/*!XXH_ACCEPT_NULL_INPUT_POINTER :
+ * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
+ * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
+ * By default, this option is disabled. To enable it, uncomment below define :
+ */
+/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
+
+/*!XXH_FORCE_NATIVE_FORMAT :
+ * By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
+ * Results are therefore identical for little-endian and big-endian CPU.
+ * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
+ * Should endian-independance be of no importance for your application, you may set the #define below to 1,
+ * to improve speed for Big-endian CPU.
+ * This option has no impact on Little_Endian CPU.
+ */
+#ifndef XXH_FORCE_NATIVE_FORMAT   /* can be defined externally */
+#  define XXH_FORCE_NATIVE_FORMAT 0
+#endif
+
+/*!XXH_FORCE_ALIGN_CHECK :
+ * This is a minor performance trick, only useful with lots of very small keys.
+ * It means : check for aligned/unaligned input.
+ * The check costs one initial branch per hash; set to 0 when the input data
+ * is guaranteed to be aligned.
+ */
+#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
+#  if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+#    define XXH_FORCE_ALIGN_CHECK 0
+#  else
+#    define XXH_FORCE_ALIGN_CHECK 1
+#  endif
+#endif
+
+#if defined(_KERNEL)
+#include <sys/types.h>
+#include <sys/systm.h>
+#else
+
+/* *************************************
+*  Includes & Memory related functions
+***************************************/
+/* Modify the local functions below should you wish to use some other memory routines */
+/* for malloc(), free() */
+#include <stdlib.h>
+static void* XXH_malloc(size_t s) { return malloc(s); }
+static void  XXH_free  (void* p)  { free(p); }
+/* for memcpy() */
+#include <string.h>
+#endif
+
+static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
+
+#define XXH_STATIC_LINKING_ONLY
+#include "xxhash.h"
+
+
+/* *************************************
+*  Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
+#  define FORCE_INLINE static __forceinline
+#else
+#  if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+/* *************************************
+*  Basic Types
+***************************************/
+#ifndef MEM_MODULE
+# define MEM_MODULE
+# if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#   include <stdint.h>
+    typedef uint8_t  BYTE;
+    typedef uint16_t U16;
+    typedef uint32_t U32;
+    typedef  int32_t S32;
+    typedef uint64_t U64;
+#  else
+    typedef unsigned char      BYTE;
+    typedef unsigned short     U16;
+    typedef unsigned int       U32;
+    typedef   signed int       S32;
+    typedef unsigned long long U64;
+#  endif
+#endif
+
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+
+static U32 XXH_read32(const void* memPtr)
+{
+    U32 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+static U64 XXH_read64(const void* memPtr)
+{
+    U64 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+
+/* ****************************************
+*  Compiler-specific Functions and Macros
+******************************************/
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
+#if defined(_MSC_VER)
+#  define XXH_rotl32(x,r) _rotl(x,r)
+#  define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+#  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
+#endif
+
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXH_swap32 _byteswap_ulong
+#  define XXH_swap64 _byteswap_uint64
+#elif GCC_VERSION >= 403
+#  define XXH_swap32 __builtin_bswap32
+#  define XXH_swap64 __builtin_bswap64
+#else
+static U32 XXH_swap32 (U32 x)
+{
+    return  ((x << 24) & 0xff000000 ) |
+            ((x <<  8) & 0x00ff0000 ) |
+            ((x >>  8) & 0x0000ff00 ) |
+            ((x >> 24) & 0x000000ff );
+}
+static U64 XXH_swap64 (U64 x)
+{
+    return  ((x << 56) & 0xff00000000000000ULL) |
+            ((x << 40) & 0x00ff000000000000ULL) |
+            ((x << 24) & 0x0000ff0000000000ULL) |
+            ((x << 8)  & 0x000000ff00000000ULL) |
+            ((x >> 8)  & 0x00000000ff000000ULL) |
+            ((x >> 24) & 0x0000000000ff0000ULL) |
+            ((x >> 40) & 0x000000000000ff00ULL) |
+            ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+
+/* *************************************
+*  Architecture Macros
+***************************************/
+typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+
+/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+    static const int g_one = 1;
+#   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&g_one))
+#endif
+
+
+/* ***************************
+*  Memory reads
+*****************************/
+typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
+
+FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
+    else
+        return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
+}
+
+FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE32_align(ptr, endian, XXH_unaligned);
+}
+
+static U32 XXH_readBE32(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
+}
+
+FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+    else
+        return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
+}
+
+FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE64_align(ptr, endian, XXH_unaligned);
+}
+
+static U64 XXH_readBE64(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
+}
+
+
+/* *************************************
+*  Macros
+***************************************/
+#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(int)(!!(c)) }; }    /* use only *after* variable declarations */
+
+
+/* *************************************
+*  Constants
+***************************************/
+static const U32 PRIME32_1 = 2654435761U;
+static const U32 PRIME32_2 = 2246822519U;
+static const U32 PRIME32_3 = 3266489917U;
+static const U32 PRIME32_4 =  668265263U;
+static const U32 PRIME32_5 =  374761393U;
+
+static const U64 PRIME64_1 = 11400714785074694791ULL;
+static const U64 PRIME64_2 = 14029467366897019727ULL;
+static const U64 PRIME64_3 =  1609587929392839161ULL;
+static const U64 PRIME64_4 =  9650029242287828579ULL;
+static const U64 PRIME64_5 =  2870177450012600261ULL;
+
+XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
+
+
+/* ***************************
+*  Simple Hash Functions
+*****************************/
+
+static U32 XXH32_round(U32 seed, U32 input)
+{
+    seed += input * PRIME32_2;
+    seed  = XXH_rotl32(seed, 13);
+    seed *= PRIME32_1;
+    return seed;
+}
+
+FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* bEnd = p + len;
+    U32 h32;
+#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL) {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)16;
+    }
+#endif
+
+    if (len>=16) {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = seed + PRIME32_1 + PRIME32_2;
+        U32 v2 = seed + PRIME32_2;
+        U32 v3 = seed + 0;
+        U32 v4 = seed - PRIME32_1;
+
+        do {
+            v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
+            v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
+            v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
+            v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
+        } while (p<=limit);
+
+        h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+    } else {
+        h32  = seed + PRIME32_5;
+    }
+
+    h32 += (U32) len;
+
+    while (p+4<=bEnd) {
+        h32 += XXH_get32bits(p) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
+        p+=4;
+    }
+
+    while (p<bEnd) {
+        h32 += (*p) * PRIME32_5;
+        h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH32_CREATESTATE_STATIC(state);
+    XXH32_reset(state, seed);
+    XXH32_update(state, input, len);
+    return XXH32_digest(state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
+            if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+                return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+            else
+                return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }   }
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+
+static U64 XXH64_round(U64 acc, U64 input)
+{
+    acc += input * PRIME64_2;
+    acc  = XXH_rotl64(acc, 31);
+    acc *= PRIME64_1;
+    return acc;
+}
+
+static U64 XXH64_mergeRound(U64 acc, U64 val)
+{
+    val  = XXH64_round(0, val);
+    acc ^= val;
+    acc  = acc * PRIME64_1 + PRIME64_4;
+    return acc;
+}
+
+FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+    U64 h64;
+#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL) {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)32;
+    }
+#endif
+
+    if (len>=32) {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = seed + PRIME64_1 + PRIME64_2;
+        U64 v2 = seed + PRIME64_2;
+        U64 v3 = seed + 0;
+        U64 v4 = seed - PRIME64_1;
+
+        do {
+            v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
+            v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
+            v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
+            v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
+        } while (p<=limit);
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+        h64 = XXH64_mergeRound(h64, v1);
+        h64 = XXH64_mergeRound(h64, v2);
+        h64 = XXH64_mergeRound(h64, v3);
+        h64 = XXH64_mergeRound(h64, v4);
+
+    } else {
+        h64  = seed + PRIME64_5;
+    }
+
+    h64 += (U64) len;
+
+    while (p+8<=bEnd) {
+        U64 const k1 = XXH64_round(0, XXH_get64bits(p));
+        h64 ^= k1;
+        h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd) {
+        h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
+        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd) {
+        h64 ^= (*p) * PRIME64_5;
+        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH64_CREATESTATE_STATIC(state);
+    XXH64_reset(state, seed);
+    XXH64_update(state, input, len);
+    return XXH64_digest(state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
+            if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+                return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+            else
+                return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }   }
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+
+/* **************************************************
+*  Advanced Hash Functions
+****************************************************/
+
+#if !defined(_KERNEL)
+
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
+{
+    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
+{
+    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+#endif
+
+/*** Hash feed ***/
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
+{
+    XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+    memset(&state, 0, sizeof(state));
+    state.seed = seed;
+    state.v1 = seed + PRIME32_1 + PRIME32_2;
+    state.v2 = seed + PRIME32_2;
+    state.v3 = seed + 0;
+    state.v4 = seed - PRIME32_1;
+    memcpy(statePtr, &state, sizeof(state));
+    return XXH_OK;
+}
+
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
+{
+    XXH64_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+    memset(&state, 0, sizeof(state));
+    state.seed = seed;
+    state.v1 = seed + PRIME64_1 + PRIME64_2;
+    state.v2 = seed + PRIME64_2;
+    state.v3 = seed + 0;
+    state.v4 = seed - PRIME64_1;
+    memcpy(statePtr, &state, sizeof(state));
+    return XXH_OK;
+}
+
+
+FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len += len;
+
+    if (state->memsize + len < 16)  {   /* fill in tmp buffer */
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
+        state->memsize += (U32)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize) {   /* some data left from previous update */
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
+        {   const U32* p32 = state->mem32;
+            state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
+            state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
+            state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
+            state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++;
+        }
+        p += 16-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p <= bEnd-16) {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = state->v1;
+        U32 v2 = state->v2;
+        U32 v3 = state->v3;
+        U32 v4 = state->v4;
+
+        do {
+            v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
+            v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
+            v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
+            v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
+        } while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd) {
+        XXH_memcpy(state->mem32, p, bEnd-p);
+        state->memsize = (int)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
+{
+    const BYTE * p = (const BYTE*)state->mem32;
+    const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
+    U32 h32;
+
+    if (state->total_len >= 16) {
+        h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
+    } else {
+        h32 = state->seed + PRIME32_5;
+    }
+
+    h32 += (U32) state->total_len;
+
+    while (p+4<=bEnd) {
+        h32 += XXH_readLE32(p, endian) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+        p+=4;
+    }
+
+    while (p<bEnd) {
+        h32 += (*p) * PRIME32_5;
+        h32  = XXH_rotl32(h32, 11) * PRIME32_1;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH32_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+
+/* **** XXH64 **** */
+
+FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len += len;
+
+    if (state->memsize + len < 32) {  /* fill in tmp buffer */
+        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+        state->memsize += (U32)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize) {   /* tmp buffer is full */
+        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
+        state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
+        state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
+        state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
+        state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
+        p += 32-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p+32 <= bEnd) {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = state->v1;
+        U64 v2 = state->v2;
+        U64 v3 = state->v3;
+        U64 v4 = state->v4;
+
+        do {
+            v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
+            v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
+            v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
+            v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
+        } while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd) {
+        XXH_memcpy(state->mem64, p, bEnd-p);
+        state->memsize = (int)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
+{
+    const BYTE * p = (const BYTE*)state->mem64;
+    const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
+    U64 h64;
+
+    if (state->total_len >= 32) {
+        U64 const v1 = state->v1;
+        U64 const v2 = state->v2;
+        U64 const v3 = state->v3;
+        U64 const v4 = state->v4;
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+        h64 = XXH64_mergeRound(h64, v1);
+        h64 = XXH64_mergeRound(h64, v2);
+        h64 = XXH64_mergeRound(h64, v3);
+        h64 = XXH64_mergeRound(h64, v4);
+    } else {
+        h64  = state->seed + PRIME64_5;
+    }
+
+    h64 += (U64) state->total_len;
+
+    while (p+8<=bEnd) {
+        U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian));
+        h64 ^= k1;
+        h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd) {
+        h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
+        h64  = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd) {
+        h64 ^= (*p) * PRIME64_5;
+        h64  = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH64_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+/* **************************
+*  Canonical representation
+****************************/
+
+/*! Default XXH result types are basic unsigned 32 and 64 bits.
+*   The canonical representation follows human-readable write convention, aka big-endian (large digits first).
+*   These functions allow transformation of hash result into and from its canonical format.
+*   This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs.
+*/
+
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
+    memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
+    memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
+{
+    return XXH_readBE32(src);
+}
+
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
+{
+    return XXH_readBE64(src);
+}
diff --git a/sys/vfs/hammer2/xxhash/xxhash.h b/sys/vfs/hammer2/xxhash/xxhash.h
new file mode 100644 (file)
index 0000000..9e11a8f
--- /dev/null
@@ -0,0 +1,275 @@
+/*
+   xxHash - Extremely Fast Hash algorithm
+   Header File
+   Copyright (C) 2012-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+/* Notice extracted from xxHash homepage :
+
+xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name            Speed       Q.Score   Author
+xxHash          5.4 GB/s     10
+CrapWow         3.2 GB/s      2       Andrew
+MumurHash 3a    2.7 GB/s     10       Austin Appleby
+SpookyHash      2.0 GB/s     10       Bob Jenkins
+SBox            1.4 GB/s      9       Bret Mulvey
+Lookup3         1.2 GB/s      9       Bob Jenkins
+SuperFastHash   1.2 GB/s      1       Paul Hsieh
+CityHash64      1.05 GB/s    10       Pike & Alakuijala
+FNV             0.55 GB/s     5       Fowler, Noll, Vo
+CRC32           0.43 GB/s     9
+MD5-32          0.33 GB/s    10       Ronald L. Rivest
+SHA1-32         0.28 GB/s    10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+
+A 64-bits version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bits applications only.
+Name     Speed on 64 bits    Speed on 32 bits
+XXH64       13.8 GB/s            1.9 GB/s
+XXH32        6.8 GB/s            6.0 GB/s
+*/
+
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************
+*  Definitions
+******************************/
+#if !defined(_KERNEL)
+#include <stddef.h>   /* size_t */
+#endif
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+/* ****************************
+*  API modifier
+******************************/
+/*!XXH_PRIVATE_API
+*  Transforms all publics symbols within `xxhash.c` into private ones.
+*  Methodology :
+*  instead of : #include "xxhash.h"
+*  do :
+*     #define XXH_PRIVATE_API
+*     #include "xxhash.c"   // note the .c , instead of .h
+*  also : don't compile and link xxhash.c separately
+*/
+#ifdef XXH_PRIVATE_API
+#  if defined(__GNUC__)
+#    define XXH_PUBLIC_API static __attribute__((unused))
+#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#    define XXH_PUBLIC_API static inline
+#  elif defined(_MSC_VER)
+#    define XXH_PUBLIC_API static __inline
+#  else
+#    define XXH_PUBLIC_API static   /* this version may generate warnings for unused static functions; disable the relevant warning */
+#  endif
+#else
+#  define XXH_PUBLIC_API   /* do nothing */
+#endif
+
+/*!XXH_NAMESPACE, aka Namespace Emulation :
+
+If you want to include _and expose_ xxHash functions from within your own library,
+but also want to avoid symbol collisions with another library which also includes xxHash,
+
+you can use XXH_NAMESPACE, to automatically prefix any public symbol from `xxhash.c`
+with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values).
+
+Note that no change is required within the calling program as long as it also includes `xxhash.h` :
+regular symbol name will be automatically translated by this header.
+*/
+#ifdef XXH_NAMESPACE
+#  define XXH_CAT(A,B) A##B
+#  define XXH_NAME2(A,B) XXH_CAT(A,B)
+#  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+#  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+#  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
+#  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+#  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+#  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+#  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+#  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+#  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+#endif
+
+
+/* *************************************
+*  Version
+***************************************/
+#define XXH_VERSION_MAJOR    0
+#define XXH_VERSION_MINOR    6
+#define XXH_VERSION_RELEASE  0
+#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
+XXH_PUBLIC_API unsigned XXH_versionNumber (void);
+
+
+/* ****************************
+*  Simple Hash Functions
+******************************/
+typedef unsigned int       XXH32_hash_t;
+typedef unsigned long long XXH64_hash_t;
+
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
+XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
+
+/*!
+XXH32() :
+    Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
+    The memory between input & input+length must be valid (allocated and read-accessible).
+    "seed" can be used to alter the result predictably.
+    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
+XXH64() :
+    Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
+    "seed" can be used to alter the result predictably.
+    This function runs faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
+*/
+
+
+/* ****************************
+*  Streaming Hash Functions
+******************************/
+typedef struct XXH32_state_s XXH32_state_t;   /* incomplete type */
+typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
+
+/*! Dynamic allocation of states
+    Compatible with dynamic libraries */
+
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
+
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+
+
+/* hash streaming */
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned int seed);
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
+
+/*!
+These functions generate the xxHash of an input provided in multiple segments,
+as opposed to provided as a single block.
+
+XXH state must first be allocated, using either static or dynamic method provided above.
+
+Start a new hash by initializing state with a seed, using XXHnn_reset().
+
+Then, feed the hash state by calling XXHnn_update() as many times as necessary.
+Obviously, input must be valid, hence allocated and read accessible.
+The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+
+Finally, a hash value can be produced anytime, by using XXHnn_digest().
+This function returns the nn-bits hash as an int or long long.
+
+It's still possible to continue inserting input into the hash state after a digest,
+and later on generate some new hashes, by calling again XXHnn_digest().
+
+When done, free XXH state space if it was allocated dynamically.
+*/
+
+
+/* **************************
+*  Canonical representation
+****************************/
+typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
+typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
+
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
+
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+
+/*! Default result type for XXH functions are primitive unsigned 32 and 64 bits.
+*   The canonical representation uses human-readable write convention, aka big-endian (large digits first).
+*   These functions allow transformation of hash result into and from its canonical format.
+*   This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
+*/
+
+
+#ifdef XXH_STATIC_LINKING_ONLY
+
+/* This part contains definition which shall only be used with static linking.
+   The prototypes / types defined here are not guaranteed to remain stable.
+   They could change in a future version, becoming incompatible with a different version of the library */
+
+   struct XXH32_state_s {
+       unsigned long long total_len;
+       unsigned seed;
+       unsigned v1;
+       unsigned v2;
+       unsigned v3;
+       unsigned v4;
+       unsigned mem32[4];   /* buffer defined as U32 for alignment */
+       unsigned memsize;
+   };   /* typedef'd to XXH32_state_t */
+
+   struct XXH64_state_s {
+       unsigned long long total_len;
+       unsigned long long seed;
+       unsigned long long v1;
+       unsigned long long v2;
+       unsigned long long v3;
+       unsigned long long v4;
+       unsigned long long mem64[4];   /* buffer defined as U64 for alignment */
+       unsigned memsize;
+   };   /* typedef'd to XXH64_state_t */
+
+
+#endif
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* XXHASH_H_5627135585666179 */