hammer2 - Implement and test first SPAN message transaction.
authorMatthew Dillon <dillon@apollo.backplane.com>
Wed, 13 Jun 2012 05:46:13 +0000 (22:46 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Wed, 13 Jun 2012 05:46:13 +0000 (22:46 -0700)
* The hammer2 VFS now sends a dummy SPAN message to the hammer2 service
  daemon.  SPANs are used to register capabilities (primarily PFS services
  and PFS consumers).  SPAN messages are left as open transactions for the
  duration of the link and/or when the graph changes (mainly a spanning
  tree mechanic that will be coded as a function of the hammer2 service
  daemon in userland.

* Basic open transaction and simple reply message tested.  Use a dummy
  message for testing.

* hammer2_msg_write() detects CREATE, allocates state, and assigns a
  msgid.  state allocation moved out of hammer2_state_msgtx() and
  into hammer2_msg_write() so we can calculate the proper CRCs.

* Fixed a couple of expected bugs.  The userland code was swapping
  msg_hdr.source and msg_hdr.target in the reply, but I adjusted the
  message spec to NOT do that (meaning any message routing has to select
  {source} or {target} based on whether the REPLY bit is set or not.

* Memory seems to get cleaned up properly, so far.

13 files changed:
sbin/hammer2/cmd_service.c
sbin/hammer2/crypto.c
sbin/hammer2/hammer2.h
sbin/hammer2/main.c
sbin/hammer2/msg.c
sys/vfs/hammer2/DESIGN
sys/vfs/hammer2/Makefile
sys/vfs/hammer2/hammer2.h
sys/vfs/hammer2/hammer2_disk.h
sys/vfs/hammer2/hammer2_msg.c
sys/vfs/hammer2/hammer2_msgops.c [new file with mode: 0644]
sys/vfs/hammer2/hammer2_network.h
sys/vfs/hammer2/hammer2_vfsops.c

index 5b0ebc7..d6f9e2f 100644 (file)
@@ -88,10 +88,14 @@ cmd_service(void)
        lsin.sin_port = htons(HAMMER2_LISTEN_PORT);
        if (bind(lfd, (struct sockaddr *)&lsin, sizeof(lsin)) < 0) {
                close(lfd);
-               fprintf(stderr, "master listen: daemon already running\n");
+               if (QuietOpt == 0) {
+                       fprintf(stderr,
+                               "master listen: daemon already running\n");
+               }
                return 0;
        }
-       fprintf(stderr, "master listen: startup\n");
+       if (QuietOpt == 0)
+               fprintf(stderr, "master listen: startup\n");
        listen(lfd, 50);
 
        /*
index 480568c..248764d 100644 (file)
@@ -131,6 +131,9 @@ hammer2_crypto_negotiate(hammer2_iocom_t *iocom)
 
        /*
         * Find the remote host's public key
+        *
+        * If the link is not to be encrypted (<ip>.none located) we shortcut
+        * the handshake entirely.  No buffers are exchanged.
         */
        asprintf(&path, "%s/%s.pub", HAMMER2_PATH_REMOTE, peername);
        if ((fp = fopen(path, "r")) == NULL) {
@@ -146,6 +149,7 @@ hammer2_crypto_negotiate(hammer2_iocom_t *iocom)
                }
                if (DebugOpt)
                        fprintf(stderr, "auth succeeded, unencrypted link\n");
+               goto done;
        }
        if (fp) {
                keys[0] = PEM_read_RSA_PUBKEY(fp, NULL, NULL, NULL);
index 6eddcd0..ed71e6c 100644 (file)
@@ -81,6 +81,7 @@
 
 extern int DebugOpt;
 extern int VerboseOpt;
+extern int QuietOpt;
 extern int NormalExit;
 
 int hammer2_ioctl_handle(const char *sel_path);
index a6aa429..75caad5 100644 (file)
@@ -39,6 +39,7 @@ static void usage(int code);
 
 int DebugOpt;
 int VerboseOpt;
+int QuietOpt;
 int NormalExit = 1;    /* if set to 0 main() has to pthread_exit() */
 
 int
@@ -48,7 +49,6 @@ main(int ac, char **av)
        const char *uuid_str = NULL;
        const char *arg;
        int pfs_type = HAMMER2_PFSTYPE_NONE;
-       int quick_opt = 0;
        int all_opt = 0;
        int ecode = 0;
        int ch;
@@ -66,13 +66,6 @@ main(int ac, char **av)
                case 'd':
                        DebugOpt = 1;
                        break;
-               case 'q':
-                       /*
-                        * Quick mode - do not block verifying certain
-                        * operations such as (connect).
-                        */
-                       quick_opt = 1;
-                       break;
                case 's':
                        sel_path = optarg;
                        break;
@@ -107,7 +100,16 @@ main(int ac, char **av)
                        uuid_str = optarg;
                        break;
                case 'v':
-                       ++VerboseOpt;
+                       if (QuietOpt)
+                               --QuietOpt;
+                       else
+                               ++VerboseOpt;
+                       break;
+               case 'q':
+                       if (VerboseOpt)
+                               --VerboseOpt;
+                       else
+                               ++QuietOpt;
                        break;
                default:
                        fprintf(stderr, "Unknown option: %c\n", ch);
index 4fc4439..f17855a 100644 (file)
@@ -178,8 +178,6 @@ hammer2_allocreply(hammer2_msg_t *msg, uint32_t cmd, int aux_size)
 
        rmsg = hammer2_allocmsg(msg->iocom, cmd, aux_size);
        rmsg->any.head = msg->any.head;
-       rmsg->any.head.source = msg->any.head.target;
-       rmsg->any.head.target = msg->any.head.source;
        rmsg->any.head.cmd = (cmd | HAMMER2_MSGF_REPLY) &
                             ~(HAMMER2_MSGF_CREATE | HAMMER2_MSGF_DELETE);
        rmsg->any.head.aux_icrc = 0;
@@ -917,13 +915,9 @@ void
 hammer2_replymsg(hammer2_msg_t *msg, uint16_t error)
 {
        hammer2_persist_t *pers;
-       uint16_t t16;
 
        assert((msg->any.head.cmd & HAMMER2_MSGF_REPLY) == 0);
 
-       t16 = msg->any.head.source;
-       msg->any.head.source = msg->any.head.target;
-       msg->any.head.target = t16;
        msg->any.head.error = error;
        msg->any.head.cmd |= HAMMER2_MSGF_REPLY;
        msg->aux_size = 0;
index fe10627..6cb1cc4 100644 (file)
@@ -300,6 +300,7 @@ caches, use local media for persistent cache, or use local media to
 completely slave the filesystem.
 
     ADMIN      - Media does not participate, administrative proxy only
+    CLIENT     - Media does not participate, client only
     CACHE      - Media only acts as a persistent cache
     COPY       - Media only acts as a local copy
     SLAVE      - Media is a RO slave that can be mounted RW
index 5dfde44..294aae5 100644 (file)
@@ -7,6 +7,6 @@ CFLAGS+= -DINVARIANTS -DSMP
 KMOD=  hammer2
 SRCS=  hammer2_vfsops.c hammer2_vnops.c hammer2_inode.c hammer2_ccms.c
 SRCS+= hammer2_chain.c hammer2_freemap.c hammer2_subr.c hammer2_icrc.c
-SRCS+= hammer2_ioctl.c hammer2_msg.c
+SRCS+= hammer2_ioctl.c hammer2_msg.c hammer2_msgops.c
 
 .include <bsd.kmod.mk>
index 8faa6c6..0b3b4da 100644 (file)
@@ -320,6 +320,7 @@ struct hammer2_pfsmount {
        thread_t                msgrd_td;       /* cluster thread */
        thread_t                msgwr_td;       /* cluster thread */
        int                     msg_ctl;        /* wakeup flags */
+       uint32_t                msgid_iterator;
        struct lock             msglk;          /* lockmgr lock */
        TAILQ_HEAD(, hammer2_msg) msgq;         /* transmit queue */
        struct hammer2_state    *freerd_state;  /* allocation cache */
@@ -336,18 +337,26 @@ typedef struct hammer2_pfsmount hammer2_pfsmount_t;
  * In-memory message structure for hammer2.
  *
  * Persistent cache state messages will be associated with a hammer2_chain.
+ *
+ * NOTE!  If REPLY is set then the source and target fields in the message
+ *       are swapped.  That is, source and target remain unchanged whether
+ *       the message is a command from side A or a reply from side B.
+ *       The message is routed based on target if REPLY is not set, and on
+ *       source if REPLY is set.
  */
 struct hammer2_state {
        RB_ENTRY(hammer2_state) rbnode;         /* indexed by msgid */
        struct hammer2_pfsmount *pmp;
        uint32_t        txcmd;                  /* mostly for CMDF flags */
        uint32_t        rxcmd;                  /* mostly for CMDF flags */
-       uint32_t        msgid;
+       uint16_t        source;                 /* command originator */
+       uint16_t        target;                 /* reply originator */
+       uint32_t        msgid;                  /* {source,target,msgid} uniq */
        int             flags;
        int             error;
        struct hammer2_chain *chain;            /* msg associated w/chain */
        struct hammer2_msg *msg;
-       void (*func)(struct hammer2_state *state, struct hammer2_msg *msg);
+       int (*func)(struct hammer2_pfsmount *, struct hammer2_msg *);
 };
 
 #define HAMMER2_STATE_INSERTED 0x0001
@@ -533,6 +542,18 @@ void hammer2_state_cleanuptx(hammer2_pfsmount_t *pmp, hammer2_msg_t *msg);
 int hammer2_msg_execute(hammer2_pfsmount_t *pmp, hammer2_msg_t *msg);
 void hammer2_state_free(hammer2_state_t *state);
 void hammer2_msg_free(hammer2_pfsmount_t *pmp, hammer2_msg_t *msg);
+hammer2_msg_t *hammer2_msg_alloc(hammer2_pfsmount_t *pmp,
+                               uint16_t source, uint16_t target,
+                               uint32_t cmd);
+hammer2_state_t *hammer2_msg_write(hammer2_pfsmount_t *pmp,
+                               hammer2_msg_t *msg,
+                               int (*func)(hammer2_pfsmount_t *,
+                                           hammer2_msg_t *));
+
+/*
+ * hammer2_msgops.c
+ */
+int hammer2_msg_adhoc_input(hammer2_pfsmount_t *pmp, hammer2_msg_t *msg);
 
 /*
  * hammer2_freemap.c
index ccc8dca..3309799 100644 (file)
@@ -509,12 +509,13 @@ typedef struct hammer2_inode_data hammer2_inode_data_t;
 
 #define HAMMER2_PFSTYPE_NONE           0
 #define HAMMER2_PFSTYPE_ADMIN          1
-#define HAMMER2_PFSTYPE_CACHE          2
-#define HAMMER2_PFSTYPE_COPY           3
-#define HAMMER2_PFSTYPE_SLAVE          4
-#define HAMMER2_PFSTYPE_SOFT_SLAVE     5
-#define HAMMER2_PFSTYPE_SOFT_MASTER    6
-#define HAMMER2_PFSTYPE_MASTER         7
+#define HAMMER2_PFSTYPE_CLIENT         2
+#define HAMMER2_PFSTYPE_CACHE          3
+#define HAMMER2_PFSTYPE_COPY           4
+#define HAMMER2_PFSTYPE_SLAVE          5
+#define HAMMER2_PFSTYPE_SOFT_SLAVE     6
+#define HAMMER2_PFSTYPE_SOFT_MASTER    7
+#define HAMMER2_PFSTYPE_MASTER         8
 
 /*
  * The allocref structure represents the allocation table.  One 64K block
@@ -701,7 +702,7 @@ struct hammer2_volume_data {
        uint32_t        flags;                  /* 0034 */
        uint8_t         copyid;                 /* 0038 copyid of phys vol */
        uint8_t         freemap_version;        /* 0039 freemap algorithm */
-       uint8_t         pfstype;                /* 003A local media pfstype */
+       uint8_t         pfs_type;               /* 003A local media pfstype */
        uint8_t         reserved003B;           /* 003B */
        uint32_t        reserved003C;           /* 003C */
 
index 6ae0186..33edf39 100644 (file)
@@ -34,6 +34,8 @@
 
 #include "hammer2.h"
 
+RB_GENERATE(hammer2_state_tree, hammer2_state, rbnode, hammer2_state_cmp);
+
 /*
  * Process state tracking for a message after reception, prior to
  * execution.
@@ -128,6 +130,11 @@ hammer2_state_msgrx(hammer2_pfsmount_t *pmp, hammer2_msg_t *msg)
        lockmgr(&pmp->msglk, LK_EXCLUSIVE);
 
        state->msgid = msg->any.head.msgid;
+       state->source = msg->any.head.source;
+       state->target = msg->any.head.target;
+       kprintf("received msg %08x msgid %u source=%u target=%u\n",
+               msg->any.head.cmd, msg->any.head.msgid, msg->any.head.source,
+               msg->any.head.target);
        if (msg->any.head.cmd & HAMMER2_MSGF_REPLY)
                state = RB_FIND(hammer2_state_tree, &pmp->statewr_tree, state);
        else
@@ -222,7 +229,7 @@ hammer2_state_msgrx(hammer2_pfsmount_t *pmp, hammer2_msg_t *msg)
                 */
                if (state == NULL) {
                        kprintf("hammer2_state_msgrx: no state match for "
-                               "REPLY\n");
+                               "REPLY cmd=%08x\n", msg->any.head.cmd);
                        error = EINVAL;
                        break;
                }
@@ -374,12 +381,21 @@ hammer2_state_msgtx(hammer2_pfsmount_t *pmp, hammer2_msg_t *msg)
                 * half-closed if DELETE is set.  Since this is a new
                 * message it isn't possible to transition into the fully
                 * closed state here.
+                *
+                * XXX state must be assigned and inserted by
+                *     hammer2_msg_write().  txcmd is assigned by us
+                *     on-transmit.
                 */
+               KKASSERT(state != NULL);
+#if 0
                if (state == NULL) {
                        state = pmp->freerd_state;
                        pmp->freerd_state = NULL;
                        msg->state = state;
                        state->msg = msg;
+                       state->msgid = msg->any.head.msgid;
+                       state->source = msg->any.head.source;
+                       state->target = msg->any.head.target;
                }
                KKASSERT((state->flags & HAMMER2_STATE_INSERTED) == 0);
                if (RB_INSERT(hammer2_state_tree, &pmp->staterd_tree, state)) {
@@ -388,6 +404,7 @@ hammer2_state_msgtx(hammer2_pfsmount_t *pmp, hammer2_msg_t *msg)
                        break;
                }
                state->flags |= HAMMER2_STATE_INSERTED;
+#endif
                state->txcmd = msg->any.head.cmd & ~HAMMER2_MSGF_DELETE;
                error = 0;
                break;
@@ -556,6 +573,25 @@ hammer2_state_free(hammer2_state_t *state)
                hammer2_msg_free(pmp, msg);
 }
 
+hammer2_msg_t *
+hammer2_msg_alloc(hammer2_pfsmount_t *pmp, uint16_t source, uint16_t target,
+                 uint32_t cmd)
+{
+       hammer2_msg_t *msg;
+       size_t hbytes;
+
+       hbytes = (cmd & HAMMER2_MSGF_SIZE) * HAMMER2_MSG_ALIGN;
+       msg = kmalloc(offsetof(struct hammer2_msg, any) + hbytes,
+                     pmp->mmsg, M_WAITOK | M_ZERO);
+       msg->hdr_size = hbytes;
+       msg->any.head.magic = HAMMER2_MSGHDR_MAGIC;
+       msg->any.head.source = source;
+       msg->any.head.target = target;
+       msg->any.head.cmd = cmd;
+
+       return (msg);
+}
+
 void
 hammer2_msg_free(hammer2_pfsmount_t *pmp, hammer2_msg_t *msg)
 {
@@ -574,6 +610,14 @@ hammer2_msg_free(hammer2_pfsmount_t *pmp, hammer2_msg_t *msg)
 int
 hammer2_state_cmp(hammer2_state_t *state1, hammer2_state_t *state2)
 {
+       if (state1->source < state2->source)
+               return(-1);
+       if (state1->source > state2->source)
+               return(1);
+       if (state1->target < state2->target)
+               return(-1);
+       if (state1->target > state2->target)
+               return(1);
        if (state1->msgid < state2->msgid)
                return(-1);
        if (state1->msgid > state2->msgid)
@@ -582,23 +626,76 @@ hammer2_state_cmp(hammer2_state_t *state1, hammer2_state_t *state2)
 }
 
 /*
- * Execute a received message.
+ * Write a message.  {source, target, cmd} have been set.
  *
- * If msg is part of a transaction msg->state will be non-NULL.  In this
- * situation state->msg saves the original command and can be replaced
- * by ongoing activity if desired.  Any msg which does not match state->msg
- * will be destroyed after we return.
- *
- * Single-element transactions set both CREATE and DELETE.
- *
- * Non-blocking transactions also set ABORT with CREATE.
- *
- * Specific transactional ABORTs can also be received and must be handled,
- * and can be received even when the other end has closed (after DELETE
- * received) if we have not yet closed our end of the transaction.
+ * If CREATE is set we allocate the state and msgid and do the insertion.
+ * If CREATE is not set the state and msgid must already be assigned.
  */
-int
-hammer2_msg_execute(hammer2_pfsmount_t *pmp __unused, hammer2_msg_t *msg __unused)
+hammer2_state_t *
+hammer2_msg_write(hammer2_pfsmount_t *pmp, hammer2_msg_t *msg,
+                 int (*func)(hammer2_pfsmount_t *, hammer2_msg_t *))
 {
-       return(0);
+       hammer2_state_t *state;
+       uint16_t xcrc16;
+       uint32_t xcrc32;
+
+       /*
+        * Setup transaction (if applicable).  One-off messages always
+        * use a msgid of 0.
+        */
+       if (msg->any.head.cmd & HAMMER2_MSGF_CREATE) {
+               /*
+                * New transaction, requires tracking state and a unique
+                * msgid.
+                */
+               KKASSERT(msg->state == NULL);
+               state = kmalloc(sizeof(*state), pmp->mmsg, M_WAITOK | M_ZERO);
+               state->pmp = pmp;
+               state->flags = HAMMER2_STATE_DYNAMIC;
+               state->func = func;
+               state->msg = msg;
+               state->source = msg->any.head.source;
+               state->target = msg->any.head.target;
+               msg->state = state;
+
+               lockmgr(&pmp->msglk, LK_EXCLUSIVE);
+               if ((state->msgid = pmp->msgid_iterator++) == 0)
+                       state->msgid = pmp->msgid_iterator++;
+               while (RB_INSERT(hammer2_state_tree,
+                                &pmp->statewr_tree, state)) {
+                       if ((state->msgid = pmp->msgid_iterator++) == 0)
+                               state->msgid = pmp->msgid_iterator++;
+               }
+               msg->any.head.msgid = state->msgid;
+       } else if (msg->state) {
+               /*
+                * Continuance or termination
+                */
+               lockmgr(&pmp->msglk, LK_EXCLUSIVE);
+       } else {
+               /*
+                * One-off message (always uses msgid 0)
+                */
+               msg->any.head.msgid = 0;
+               lockmgr(&pmp->msglk, LK_EXCLUSIVE);
+       }
+
+       /*
+        * Set icrc2 and icrc1
+        */
+       if (msg->hdr_size > sizeof(msg->any.head)) {
+               xcrc32 = hammer2_icrc32(&msg->any.head + 1,
+                                       msg->hdr_size - sizeof(msg->any.head));
+               xcrc16 = (uint16_t)xcrc32 ^ (uint16_t)(xcrc32 >> 16);
+               msg->any.head.icrc2 = xcrc16;
+       }
+       xcrc32 = hammer2_icrc32(msg->any.buf + HAMMER2_MSGHDR_CRCOFF,
+                               HAMMER2_MSGHDR_CRCBYTES);
+       xcrc16 = (uint16_t)xcrc32 ^ (uint16_t)(xcrc32 >> 16);
+       msg->any.head.icrc1 = xcrc16;
+
+       TAILQ_INSERT_TAIL(&pmp->msgq, msg, qentry);
+       lockmgr(&pmp->msglk, LK_RELEASE);
+
+       return (msg->state);
 }
diff --git a/sys/vfs/hammer2/hammer2_msgops.c b/sys/vfs/hammer2/hammer2_msgops.c
new file mode 100644 (file)
index 0000000..89f8b12
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2012 The DragonFly Project.  All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/fcntl.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/namei.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/mountctl.h>
+#include <sys/dirent.h>
+#include <sys/uio.h>
+
+#include "hammer2.h"
+
+int
+hammer2_msg_adhoc_input(hammer2_pfsmount_t *pmp, hammer2_msg_t *msg)
+{
+       kprintf("ADHOC INPUT MSG %08x\n", msg->any.head.cmd);
+       return(0);
+}
index c5a597c..4a79da6 100644 (file)
@@ -172,9 +172,9 @@ struct hammer2_msg_hdr {
        uint16_t        icrc1;          /* base header crc &salt on */
        uint32_t        salt;           /* random salt helps crypto/replay */
 
-       uint16_t        source;         /* source linkid */
-       uint16_t        target;         /* target linkid */
-       uint32_t        msgid;          /* message id */
+       uint16_t        source;         /* command originator linkid */
+       uint16_t        target;         /* reply originator linkid */
+       uint32_t        msgid;          /* {source,target,msgid} unique */
 
        uint32_t        cmd;            /* flags | cmd | hdr_size / 32 */
        uint16_t        error;          /* error field */
@@ -228,10 +228,11 @@ typedef struct hammer2_msg_hdr hammer2_msg_hdr_t;
 
 #define HAMMER2_MSG_PROTO_LNK          0x00000000U
 #define HAMMER2_MSG_PROTO_DBG          0x00100000U
-#define HAMMER2_MSG_PROTO_CAC          0x00200000U
-#define HAMMER2_MSG_PROTO_QRM          0x00300000U
-#define HAMMER2_MSG_PROTO_BLK          0x00400000U
-#define HAMMER2_MSG_PROTO_VOP          0x00500000U
+#define HAMMER2_MSG_PROTO_DOM          0x00200000U
+#define HAMMER2_MSG_PROTO_CAC          0x00300000U
+#define HAMMER2_MSG_PROTO_QRM          0x00400000U
+#define HAMMER2_MSG_PROTO_BLK          0x00500000U
+#define HAMMER2_MSG_PROTO_VOP          0x00600000U
 
 /*
  * Message command constructors, sans flags
@@ -252,6 +253,10 @@ typedef struct hammer2_msg_hdr hammer2_msg_hdr_t;
                                         ((cmd) << 8) |                 \
                                         HAMMER2_MSG_HDR_ENCODE(elm))
 
+#define HAMMER2_MSG_DOM(cmd, elm)      (HAMMER2_MSG_PROTO_DOM |        \
+                                        ((cmd) << 8) |                 \
+                                        HAMMER2_MSG_HDR_ENCODE(elm))
+
 #define HAMMER2_MSG_CAC(cmd, elm)      (HAMMER2_MSG_PROTO_CAC |        \
                                         ((cmd) << 8) |                 \
                                         HAMMER2_MSG_HDR_ENCODE(elm))
@@ -276,7 +281,7 @@ typedef struct hammer2_msg_hdr hammer2_msg_hdr_t;
  *               pad message buffers on shared-memory transports.  Not
  *               typically used with TCP.
  *
- * AUTHn       - Authenticate the connection, negotiate administrative
+ * AUTH                - Authenticate the connection, negotiate administrative
  *               rights & encryption, protocol class, etc.  Only PAD and
  *               AUTH messages (not even PING) are accepted until
  *               authentication is complete.  This message also identifies
@@ -286,7 +291,7 @@ typedef struct hammer2_msg_hdr hammer2_msg_hdr_t;
  *               typically 1/sec on idle link, link is lost after 10 seconds
  *               of inactivity.
  *
- * HSPAN       - One-way message on link-0, host-spanning tree message.
+ * STATUS      - One-way message on link-0, host-spanning tree message.
  *               Connection and authentication status is propagated using
  *               these messages on a per-connection basis.  Works like SPAN
  *               but is only used for general status.  See the hammer2
@@ -309,11 +314,65 @@ typedef struct hammer2_msg_hdr hammer2_msg_hdr_t;
 #define HAMMER2_LNK_PAD                HAMMER2_MSG_LNK(0x000, hammer2_msg_hdr)
 #define HAMMER2_LNK_PING       HAMMER2_MSG_LNK(0x001, hammer2_msg_hdr)
 #define HAMMER2_LNK_AUTH       HAMMER2_MSG_LNK(0x010, hammer2_lnk_auth)
-#define HAMMER2_LNK_HSPAN      HAMMER2_MSG_LNK(0x011, hammer2_lnk_hspan)
-#define HAMMER2_LNK_SPAN       HAMMER2_MSG_LNK(0x012, hammer2_lnk_span)
+#define HAMMER2_LNK_SPAN       HAMMER2_MSG_LNK(0x011, hammer2_lnk_span)
 #define HAMMER2_LNK_ERROR      HAMMER2_MSG_LNK(0xFFF, hammer2_msg_hdr)
 
 /*
+ * SPAN - Registration (transaction, left open)
+ *
+ * This message registers a PFS/PFS_TYPE with the other end of the connection,
+ * telling the other end who we are and what we can provide or what we want
+ * to consume.  Multiple registrations can be maintained as open transactions
+ * with each one specifying a unique {source} linkid.
+ *
+ * Registrations are sent from {source}=S {1...n} to {target}=0 and maintained
+ * as open transactions.  Registrations are also received and maintains as
+ * open transactions, creating a matrix of linkid's.
+ *
+ * While these transactions are open additional transactions can be executed
+ * between any two linkid's {source}=S (registrations we sent) to {target}=T
+ * (registrations we received).
+ *
+ * Closure of any registration transaction will automatically abort any open
+ * transactions using the related linkids.  Closure can be initiated
+ * voluntarily from either side with either end issuing a DELETE, or they
+ * can be ABORTed.
+ *
+ * Status updates are performed via the open transaction.
+ *
+ * --
+ *
+ * A registration identifies a node and its various PFS parameters including
+ * the PFS_TYPE.  For example, a diskless HAMMER2 client typically identifies
+ * itself as PFSTYPE_CLIENT.
+ *
+ * Any node may serve as a cluster controller, aggregating and passing
+ * on received registrations, but end-points do not have to implement this
+ * ability.  Most end-points typically implement a single client-style or
+ * server-style PFS_TYPE and rendezvous at a cluster controller.
+ *
+ * The cluster controller does not aggregate/pass-on all received
+ * registrations.  It typically filters what gets passed on based on
+ * what it receives.
+ *
+ * STATUS UPDATES: Status updates use the same structure but typically
+ *                only contain incremental changes to pfs_type, with the
+ *                label field containing a text status.
+ */
+struct hammer2_lnk_span {
+       hammer2_msg_hdr_t head;
+       uuid_t          pfs_id;         /* rendezvous pfs uuid */
+       uuid_t          pfs_fsid;       /* unique pfs uuid */
+       uint8_t         pfs_type;       /* peer type */
+       uint8_t         reserved01;
+       uint16_t        proto_version;  /* high level protocol support */
+       uint32_t        status;         /* status flags */
+       uint8_t         reserved02[8];
+       uint32_t        reserved03[16];
+       char            label[256];     /* PFS label (can be wildcard) */
+};
+
+/*
  * Debug layer ops operate on any link
  *
  * SHELL       - Persist stream, access the debug shell on the target
@@ -327,6 +386,13 @@ struct hammer2_dbg_shell {
 typedef struct hammer2_dbg_shell hammer2_dbg_shell_t;
 
 /*
+ * Domain layer ops operate on any link, link-0 may be used when the
+ * directory connected target is the desired registration.
+ *
+ * (nothing defined)
+ */
+
+/*
  * Cache layer ops operate on any link, link-0 may be used when the
  * directly connected target is the desired registration.
  *
index 140aa68..ce605c0 100644 (file)
@@ -139,6 +139,7 @@ static int hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data);
 
 static void hammer2_cluster_thread_rd(void *arg);
 static void hammer2_cluster_thread_wr(void *arg);
+static int hammer2_msg_span_reply(hammer2_pfsmount_t *pmp, hammer2_msg_t *msg);
 
 /*
  * HAMMER2 vfs operations.
@@ -1084,8 +1085,11 @@ hammer2_cluster_thread_rd(void *arg)
                        hammer2_msg_free(pmp, msg);
                        if (error == EALREADY)
                                error = 0;
+               } else if (msg->state) {
+                       error = msg->state->func(pmp, msg);
+                       hammer2_state_cleanuprx(pmp, msg);
                } else {
-                       error = hammer2_msg_execute(pmp, msg);
+                       error = hammer2_msg_adhoc_input(pmp, msg);
                        hammer2_state_cleanuprx(pmp, msg);
                }
                msg = NULL;
@@ -1129,7 +1133,20 @@ hammer2_cluster_thread_wr(void *arg)
        ssize_t res;
        int error = 0;
 
+       /*
+        * Initiate a SPAN transaction registering our PFS with the other
+        * end using {source}=1.  The transaction is left open.
+        */
+       msg = hammer2_msg_alloc(pmp, 1, 0,
+                               HAMMER2_LNK_SPAN | HAMMER2_MSGF_CREATE);
+       hammer2_msg_write(pmp, msg, hammer2_msg_span_reply);
+
+       /*
+        * Transmit loop
+        */
+       msg = NULL;
        lockmgr(&pmp->msglk, LK_EXCLUSIVE);
+
        while ((pmp->msg_ctl & HAMMER2_CLUSTERCTL_KILL) == 0 && error == 0) {
                lksleep(&pmp->msg_ctl, &pmp->msglk, 0, "msgwr", hz);
                while ((msg = TAILQ_FIRST(&pmp->msgq)) != NULL) {
@@ -1218,3 +1235,10 @@ hammer2_cluster_thread_wr(void *arg)
        wakeup(pmp);
        lwkt_exit();
 }
+
+static int
+hammer2_msg_span_reply(hammer2_pfsmount_t *pmp, hammer2_msg_t *msg)
+{
+       kprintf("SPAN REPLY\n");
+       return(0);
+}