hammer2 - Add server-side disk advertisements
authorMatthew Dillon <dillon@apollo.backplane.com>
Thu, 25 Oct 2012 23:00:47 +0000 (16:00 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Thu, 25 Oct 2012 23:00:47 +0000 (16:00 -0700)
* The hammer2 service demon now tracks disks and connects the cluster
  controller to each one.

* Add a new ioctl to the subr_disk subsystem and add subr_diskiocom.c
  to implement it, associating a cluster controller messaging descriptor
  with a disk device.

* disk device initiates LNK_CONN and LNK_SPAN.  Disk block devices now
  advertised through the spanning tree.  Nothing else implemented yet.

13 files changed:
lib/libdmsg/dmsg.h
lib/libdmsg/msg_lnk.c
lib/libdmsg/service.c
sbin/hammer2/cmd_service.c
sbin/hammer2/hammer2.h
sys/conf/files
sys/kern/kern_dmsg.c
sys/kern/subr_disk.c
sys/kern/subr_diskiocom.c [new file with mode: 0644]
sys/sys/disk.h
sys/sys/diskslice.h
sys/sys/dmsg.h
sys/vfs/hammer2/hammer2_vfsops.c

index ade8ed4..0a0acf4 100644 (file)
@@ -321,7 +321,9 @@ struct crypto_algo {
 struct dmsg_master_service_info {
        int     fd;
        int     detachme;
+       void    *handle;
        void    (*dbgmsg_callback)(dmsg_msg_t *msg);
+       void    (*exit_callback)(void *handle);
 };
 
 typedef struct dmsg_master_service_info dmsg_master_service_info_t;
index 6a48c6b..67cc7a9 100644 (file)
@@ -246,8 +246,9 @@ struct h2span_node {
        RB_ENTRY(h2span_node) rbnode;
        struct h2span_link_tree tree;
        struct h2span_cluster *cls;
+       uint8_t peer_type;
        uuid_t  pfs_fsid;               /* unique fsid */
-       char label[64];
+       char    label[64];
 };
 
 struct h2span_link {
@@ -306,7 +307,16 @@ static
 int
 h2span_node_cmp(h2span_node_t *node1, h2span_node_t *node2)
 {
-       return(uuid_compare(&node1->pfs_fsid, &node2->pfs_fsid, NULL));
+       int r;
+
+       if (node1->peer_type < node2->peer_type)
+               return(-1);
+       if (node1->peer_type > node2->peer_type)
+               return(1);
+       r = uuid_compare(&node1->pfs_fsid, &node2->pfs_fsid, NULL);
+       if (r == 0 && node1->peer_type == DMSG_PEER_BLOCK)
+               r = strcmp(node1->label, node2->label);
+       return (r);
 }
 
 /*
@@ -651,15 +661,19 @@ dmsg_lnk_span(dmsg_msg_t *msg)
                 * Find the node
                 */
                dummy_node.pfs_fsid = msg->any.lnk_span.pfs_fsid;
+               dummy_node.peer_type = msg->any.lnk_span.peer_type;
+               snprintf(dummy_node.label, sizeof(dummy_node.label),
+                        "%s", msg->any.lnk_span.label);
                node = RB_FIND(h2span_node_tree, &cls->tree, &dummy_node);
                if (node == NULL) {
                        node = dmsg_alloc(sizeof(*node));
                        node->pfs_fsid = msg->any.lnk_span.pfs_fsid;
+                       node->peer_type = msg->any.lnk_span.peer_type;
+                       snprintf(node->label, sizeof(node->label),
+                                "%s", msg->any.lnk_span.label);
                        node->cls = cls;
                        RB_INIT(&node->tree);
                        RB_INSERT(h2span_node_tree, &cls->tree, node);
-                       snprintf(node->label, sizeof(node->label),
-                                "%s", msg->any.lnk_span.label);
                }
 
                /*
index 58509bd..387b1cb 100644 (file)
@@ -66,6 +66,8 @@ dmsg_master_service(void *data)
                info->fd, iocom.ioq_rx.error, iocom.ioq_tx.error);
        close(info->fd);
        info->fd = -1;  /* safety */
+       if (info->exit_callback)
+               info->exit_callback(info->handle);
        free(info);
 
        return (NULL);
index 5ee6406..de396e8 100644 (file)
 
 #include "hammer2.h"
 
+struct diskcon {
+       TAILQ_ENTRY(diskcon) entry;
+       char    *disk;
+};
+
+#define WS " \r\n"
+
+TAILQ_HEAD(, diskcon) diskconq = TAILQ_HEAD_INITIALIZER(diskconq);
+pthread_mutex_t diskmtx;
+
 static void *service_thread(void *data);
 static void *udev_thread(void *data);
 static void master_reconnect(const char *mntpt);
+static void disk_reconnect(const char *disk);
+static void disk_disconnect(void *handle);
 static void udev_check_disks(void);
 
 /*
@@ -210,6 +222,7 @@ udev_check_disks(void)
 {
        char tmpbuf[1024];
        char *buf = NULL;
+       char *disk;
        int error;
        size_t n;
 
@@ -236,6 +249,9 @@ udev_check_disks(void)
        }
        if (buf) {
                fprintf(stderr, "DISKS: %s\n", buf);
+               for (disk = strtok(buf, WS); disk; disk = strtok(NULL, WS)) {
+                       disk_reconnect(disk);
+               }
                if (buf != tmpbuf)
                        free(buf);
        }
@@ -293,3 +309,98 @@ master_reconnect(const char *mntpt)
        info->dbgmsg_callback = hammer2_shell_parse;
        pthread_create(&thread, NULL, dmsg_master_service, info);
 }
+
+/*
+ * Reconnect a physical disk to the mesh.
+ */
+static
+void
+disk_reconnect(const char *disk)
+{
+       struct disk_ioc_recluster recls;
+       struct diskcon *dc;
+       dmsg_master_service_info_t *info;
+       pthread_t thread;
+       int fd;
+       int pipefds[2];
+       char *path;
+
+       /*
+        * Urm, this will auto-create mdX+1, just ignore for now.
+        * This mechanic needs to be fixed.  It might actually be nice
+        * to be able to export md disks.
+        */
+       if (strncmp(disk, "md", 2) == 0)
+               return;
+
+       /*
+        * Check if already connected
+        */
+       pthread_mutex_lock(&diskmtx);
+       TAILQ_FOREACH(dc, &diskconq, entry) {
+               if (strcmp(dc->disk, disk) == 0)
+                       break;
+       }
+       pthread_mutex_unlock(&diskmtx);
+       if (dc)
+               return;
+
+       /*
+        * Not already connected, create a connection to the kernel
+        * disk driver.
+        */
+       asprintf(&path, "/dev/%s", disk);
+       fd = open(path, O_RDONLY);
+       if (fd < 0) {
+               fprintf(stderr, "reconnect %s: no access to disk\n", disk);
+               free(path);
+               return;
+       }
+       free(path);
+       if (pipe(pipefds) < 0) {
+               fprintf(stderr, "reconnect %s: pipe() failed\n", disk);
+               close(fd);
+               return;
+       }
+       bzero(&recls, sizeof(recls));
+       recls.fd = pipefds[0];
+       if (ioctl(fd, DIOCRECLUSTER, &recls) < 0) {
+               fprintf(stderr, "reconnect %s: ioctl failed\n", disk);
+               close(pipefds[0]);
+               close(pipefds[1]);
+               close(fd);
+               return;
+       }
+       close(pipefds[0]);
+       close(fd);
+
+       dc = malloc(sizeof(*dc));
+       dc->disk = strdup(disk);
+       pthread_mutex_lock(&diskmtx);
+       TAILQ_INSERT_TAIL(&diskconq, dc, entry);
+       pthread_mutex_unlock(&diskmtx);
+
+       info = malloc(sizeof(*info));
+       bzero(info, sizeof(*info));
+       info->fd = pipefds[1];
+       info->detachme = 1;
+       info->dbgmsg_callback = hammer2_shell_parse;
+       info->exit_callback = disk_disconnect;
+       info->handle = dc;
+       pthread_create(&thread, NULL, dmsg_master_service, info);
+}
+
+static
+void
+disk_disconnect(void *handle)
+{
+       struct diskcon *dc = handle;
+
+       fprintf(stderr, "DISK_DISCONNECT %s\n", dc->disk);
+
+       pthread_mutex_lock(&diskmtx);
+       TAILQ_REMOVE(&diskconq, dc, entry);
+       pthread_mutex_unlock(&diskmtx);
+       free(dc->disk);
+       free(dc);
+}
index 95d473e..7e7c439 100644 (file)
@@ -38,6 +38,7 @@
  */
 #include <sys/types.h>
 #include <sys/uio.h>
+#include <sys/queue.h>
 #include <sys/mount.h>
 #include <sys/file.h>
 #include <sys/socket.h>
@@ -47,6 +48,7 @@
 #include <sys/endian.h>
 #include <sys/sysctl.h>
 #include <sys/udev.h>
+#include <sys/diskslice.h>
 #include <dmsg.h>
 
 #include <netinet/in.h>
index fdf3c43..a612110 100644 (file)
@@ -899,6 +899,7 @@ kern/subr_bus.c             standard
 kern/subr_busdma.c     standard
 kern/subr_devstat.c    standard
 kern/subr_disk.c       standard
+kern/subr_diskiocom.c  standard
 kern/subr_disklabel32.c        standard
 kern/subr_disklabel64.c        standard
 kern/subr_diskslice.c  standard
index 0bf8723..a6572f8 100644 (file)
@@ -63,7 +63,6 @@ static void kdmsg_iocom_thread_wr(void *arg);
 void
 kdmsg_iocom_init(kdmsg_iocom_t *iocom, void *handle,
                 struct malloc_type *mmsg,
-                void (*cctl_wakeup)(kdmsg_iocom_t *),
                 int (*lnk_rcvmsg)(kdmsg_msg_t *msg),
                 int (*dbg_rcvmsg)(kdmsg_msg_t *msg),
                 int (*misc_rcvmsg)(kdmsg_msg_t *msg))
@@ -71,7 +70,6 @@ kdmsg_iocom_init(kdmsg_iocom_t *iocom, void *handle,
        bzero(iocom, sizeof(*iocom));
        iocom->handle = handle;
        iocom->mmsg = mmsg;
-       iocom->clusterctl_wakeup = cctl_wakeup;
        iocom->lnk_rcvmsg = lnk_rcvmsg;
        iocom->dbg_rcvmsg = dbg_rcvmsg;
        iocom->misc_rcvmsg = misc_rcvmsg;
@@ -121,6 +119,31 @@ kdmsg_iocom_reconnect(kdmsg_iocom_t *iocom, struct file *fp,
                    NULL, 0, -1, "%s-msgwr", subsysname);
 }
 
+/*
+ * Disconnect and clean up
+ */
+void
+kdmsg_iocom_uninit(kdmsg_iocom_t *iocom)
+{
+       /*
+        * Ask the cluster controller to go away
+        */
+       atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL);
+
+       while (iocom->msgrd_td || iocom->msgwr_td) {
+               wakeup(&iocom->msg_ctl);
+               tsleep(iocom, 0, "clstrkl", hz);
+       }
+
+       /*
+        * Drop communications descriptor
+        */
+       if (iocom->msg_fp) {
+               fdrop(iocom->msg_fp);
+               iocom->msg_fp = NULL;
+       }
+}
+
 /*
  * Cluster controller thread.  Perform messaging functions.  We have one
  * thread for the reader and one for the writer.  The writer handles
@@ -1221,7 +1244,13 @@ kdmsg_msg_write(kdmsg_msg_t *msg)
        msg->any.head.hdr_crc = iscsi_crc32(msg->any.buf, msg->hdr_size);
 
        TAILQ_INSERT_TAIL(&iocom->msgq, msg, qentry);
-       iocom->clusterctl_wakeup(iocom);
+
+       if (iocom->msg_ctl & KDMSG_CLUSTERCTL_SLEEPING) {
+               atomic_clear_int(&iocom->msg_ctl,
+                                KDMSG_CLUSTERCTL_SLEEPING);
+               wakeup(&iocom->msg_ctl);
+       }
+
        lockmgr(&iocom->msglk, LK_RELEASE);
 }
 
index e8f40cf..a83832f 100644 (file)
@@ -183,9 +183,8 @@ disk_probe_slice(struct disk *dp, cdev_t dev, int slice, int reprobe)
        int sno;
        u_int i;
 
-       disk_debug(2,
-                   "disk_probe_slice (begin): %s (%s)\n",
-                       dev->si_name, dp->d_cdev->si_name);
+       disk_debug(2, "disk_probe_slice (begin): %s (%s)\n",
+                  dev->si_name, dp->d_cdev->si_name);
 
        sno = slice ? slice - 1 : 0;
 
@@ -475,6 +474,7 @@ disk_msg_core(void *arg)
                        disk_debug(1,
                                    "DISK_DISK_PROBE: %s\n",
                                        dp->d_cdev->si_name);
+                       disk_iocom_update(dp);
                        disk_probe(dp, 0);
                        break;
                case DISK_DISK_DESTROY:
@@ -482,6 +482,7 @@ disk_msg_core(void *arg)
                        disk_debug(1,
                                    "DISK_DISK_DESTROY: %s\n",
                                        dp->d_cdev->si_name);
+                       disk_iocom_uninit(dp);
                        devfs_destroy_related(dp->d_cdev);
                        destroy_dev(dp->d_cdev);
                        destroy_only_dev(dp->d_rawdev);
@@ -691,8 +692,10 @@ _disk_create_named(const char *name, int unit, struct disk *dp,
        LIST_INSERT_HEAD(&disklist, dp, d_list);
        lwkt_reltoken(&disklist_token);
 
+       disk_iocom_init(dp);
+
        disk_debug(1, "disk_create (end): %s%d\n",
-           (name != NULL)?(name):(raw_ops->head.name), unit);
+                  (name != NULL)?(name):(raw_ops->head.name), unit);
 
        return (dp->d_rawdev);
 }
@@ -1065,6 +1068,12 @@ diskioctl(struct dev_ioctl_args *ap)
                return disk_dumpconf(dev, u);
        }
 
+       if (ap->a_cmd == DIOCRECLUSTER && dev == dp->d_cdev) {
+               kprintf("RECLUSTER\n");
+               error = disk_iocom_ioctl(dp, ap->a_cmd, ap->a_data);
+               return error;
+       }
+
        if (&dp->d_slice == NULL || dp->d_slice == NULL ||
            ((dp->d_info.d_dsflags & DSO_DEVICEMAPPER) &&
             dkslice(dev) == WHOLE_DISK_SLICE)) {
diff --git a/sys/kern/subr_diskiocom.c b/sys/kern/subr_diskiocom.c
new file mode 100644 (file)
index 0000000..2c6bfff
--- /dev/null
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2012 The DragonFly Project.  All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
+#include <sys/buf.h>
+#include <sys/conf.h>
+#include <sys/disklabel.h>
+#include <sys/disklabel32.h>
+#include <sys/disklabel64.h>
+#include <sys/diskslice.h>
+#include <sys/diskmbr.h>
+#include <sys/disk.h>
+#include <sys/malloc.h>
+#include <sys/device.h>
+#include <sys/devfs.h>
+#include <sys/thread.h>
+#include <sys/queue.h>
+#include <sys/lock.h>
+#include <sys/uuid.h>
+
+#include <sys/dmsg.h>
+
+#include <sys/buf2.h>
+#include <sys/mplock2.h>
+#include <sys/msgport2.h>
+#include <sys/thread2.h>
+
+static MALLOC_DEFINE(M_DMSG_DISK, "dmsg_disk", "disk dmsg");
+
+static int disk_iocom_reconnect(struct disk *dp, struct file *fp);
+static int disk_msg_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg);
+static int disk_msg_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg);
+
+void
+disk_iocom_init(struct disk *dp)
+{
+       kdmsg_iocom_init(&dp->d_iocom, dp, M_DMSG_DISK,
+                        disk_lnk_rcvmsg,
+                        disk_dbg_rcvmsg,
+                        disk_adhoc_input);
+}
+
+void
+disk_iocom_update(struct disk *dp)
+{
+}
+
+void
+disk_iocom_uninit(struct disk *dp)
+{
+       kdmsg_iocom_uninit(&dp->d_iocom);
+}
+
+int
+disk_iocom_ioctl(struct disk *dp, int cmd, void *data)
+{
+       struct file *fp;
+       struct disk_ioc_recluster *recl;
+       int error;
+
+       switch(cmd) {
+       case DIOCRECLUSTER:
+               recl = data;
+               fp = holdfp(curproc->p_fd, recl->fd, -1);
+               if (fp) {
+                       error = disk_iocom_reconnect(dp, fp);
+               } else {
+                       error = EINVAL;
+               }
+               break;
+       default:
+               error = EOPNOTSUPP;
+               break;
+       }
+       return error;
+}
+
+static
+int
+disk_iocom_reconnect(struct disk *dp, struct file *fp)
+{
+       kdmsg_msg_t *msg;
+       char devname[64];
+
+       ksnprintf(devname, sizeof(devname), "%s%d",
+                 dev_dname(dp->d_rawdev), dkunit(dp->d_rawdev));
+
+       kdmsg_iocom_reconnect(&dp->d_iocom, fp, devname);
+
+       msg = kdmsg_msg_alloc(&dp->d_iocom.router, DMSG_LNK_CONN | DMSGF_CREATE,
+                             disk_msg_conn_reply, dp);
+       msg->any.lnk_conn.pfs_type = 0;
+       msg->any.lnk_conn.proto_version = DMSG_SPAN_PROTO_1;
+       msg->any.lnk_conn.peer_type = DMSG_PEER_BLOCK;
+       msg->any.lnk_conn.peer_mask = 1LLU << DMSG_PEER_BLOCK;
+
+       ksnprintf(msg->any.lnk_conn.label, sizeof(msg->any.lnk_conn.label),
+                 "%s/%s", hostname, devname);
+       dp->d_iocom.conn_state = msg->state;
+       kdmsg_msg_write(msg);
+
+       return (0);
+}
+
+/*
+ * Received reply to our LNK_CONN transaction, indicating LNK_SPAN support.
+ * Issue LNK_SPAN.
+ */
+static
+int
+disk_msg_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg)
+{
+       struct disk *dp = state->any.any;
+       kdmsg_msg_t *rmsg;
+
+       if (msg->any.head.cmd & DMSGF_CREATE) {
+               kprintf("DISK LNK_CONN received reply\n");
+               rmsg = kdmsg_msg_alloc(&dp->d_iocom.router,
+                                      DMSG_LNK_SPAN | DMSGF_CREATE,
+                                      disk_msg_span_reply, dp);
+               rmsg->any.lnk_span.pfs_type = 0;
+               rmsg->any.lnk_span.proto_version = DMSG_SPAN_PROTO_1;
+               rmsg->any.lnk_span.peer_type = DMSG_PEER_BLOCK;
+
+               ksnprintf(rmsg->any.lnk_span.label,
+                         sizeof(rmsg->any.lnk_span.label),
+                         "%s/%s%d",
+                         hostname,
+                         dev_dname(dp->d_rawdev),
+                         dkunit(dp->d_rawdev));
+               kdmsg_msg_write(rmsg);
+       }
+       if ((state->txcmd & DMSGF_DELETE) == 0 &&
+           (msg->any.head.cmd & DMSGF_DELETE)) {
+               kprintf("DISK LNK_CONN terminated by remote\n");
+               dp->d_iocom.conn_state = NULL;
+               kdmsg_msg_reply(msg, 0);
+       }
+       return(0);
+}
+
+/*
+ * Reply to our LNK_SPAN.  The transaction is left open.
+ */
+static
+int
+disk_msg_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg)
+{
+       /*struct disk *dp = state->any.any;*/
+
+       kprintf("DISK LNK_SPAN reply received\n");
+       if ((state->txcmd & DMSGF_DELETE) == 0 &&
+           (msg->any.head.cmd & DMSGF_DELETE)) {
+               kdmsg_msg_reply(msg, 0);
+       }
+       return (0);
+}
+
+int
+disk_lnk_rcvmsg(kdmsg_msg_t *msg)
+{
+       /*struct disk *dp = msg->router->iocom->handle;*/
+
+       switch(msg->any.head.cmd & DMSGF_TRANSMASK) {
+       case DMSG_LNK_CONN | DMSGF_CREATE:
+               /*
+                * reply & leave trans open
+                */
+               kprintf("DISK CONN RECEIVE - (just ignore it)\n");
+               kdmsg_msg_result(msg, 0);
+               break;
+       case DMSG_LNK_SPAN | DMSGF_CREATE:
+               kprintf("DISK SPAN RECEIVE - ADDED FROM CLUSTER\n");
+               break;
+       case DMSG_LNK_SPAN | DMSGF_DELETE:
+               kprintf("DISK SPAN RECEIVE - DELETED FROM CLUSTER\n");
+               break;
+       default:
+               break;
+       }
+       return (0);
+}
+
+int
+disk_dbg_rcvmsg(kdmsg_msg_t *msg)
+{
+       /*struct disk *dp = msg->router->iocom->handle;*/
+
+       switch(msg->any.head.cmd & DMSGF_CMDSWMASK) {
+       case DMSG_DBG_SHELL:
+               /*
+                * Execute shell command (not supported atm)
+                */
+               kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
+               break;
+       case DMSG_DBG_SHELL | DMSGF_REPLY:
+               if (msg->aux_data) {
+                       msg->aux_data[msg->aux_size - 1] = 0;
+                       kprintf("DEBUGMSG: %s\n", msg->aux_data);
+               }
+               break;
+       default:
+               kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
+               break;
+       }
+       return (0);
+}
+
+int
+disk_adhoc_input(kdmsg_msg_t *msg)
+{
+       struct disk *dp = msg->router->iocom->handle;
+
+       kprintf("DISK ADHOC INPUT %s%d\n",
+               dev_dname(dp->d_rawdev), dkunit(dp->d_rawdev));
+
+       return (0);
+}
index 1dfdff9..3aa58c0 100644 (file)
@@ -58,6 +58,9 @@
 #ifndef _SYS_MSGPORT_H_
 #include <sys/msgport.h>
 #endif
+#ifndef _SYS_DMSG_H_
+#include <sys/dmsg.h>
+#endif
 
 /*
  * Media information structure - filled in by the media driver.
@@ -142,6 +145,7 @@ struct disk {
        struct dsched_policy    *d_sched_policy;/* I/O scheduler policy */
        const char              *d_disktype;    /* Disk type information */
        LIST_ENTRY(disk)        d_list;
+       kdmsg_iocom_t           d_iocom;        /* cluster import/export */
 };
 
 /*
@@ -173,6 +177,15 @@ void disk_config(void *);
 
 int bounds_check_with_mediasize(struct bio *bio, int secsize, uint64_t mediasize);
 
+void disk_iocom_init(struct disk *dp);
+void disk_iocom_update(struct disk *dp);
+void disk_iocom_uninit(struct disk *dp);
+int disk_iocom_ioctl(struct disk *dp, int cmd, void *data);
+void disk_clusterctl_wakeup(kdmsg_iocom_t *iocom);
+int disk_lnk_rcvmsg(kdmsg_msg_t *msg);
+int disk_dbg_rcvmsg(kdmsg_msg_t *msg);
+int disk_adhoc_input(kdmsg_msg_t *msg);
+
 typedef struct disk_msg {
        struct lwkt_msg hdr;
        void    *load;
@@ -184,7 +197,7 @@ typedef struct disk_msg {
 #define DISK_SLICE_REPROBE     0x03
 #define DISK_DISK_REPROBE      0x04
 #define DISK_UNPROBE           0x05
-#define DISK_SYNC                      0x99
+#define DISK_SYNC              0x99
 
 
 #endif /* _KERNEL */
index 415af6f..8e11c70 100644 (file)
@@ -94,7 +94,8 @@
 #define DIOCWLABEL             _IOW('d', 109, int)
 #define        DIOCGSLICEINFO          _IOR('d', 111, struct diskslices)
 #define        DIOCSYNCSLICEINFO       _IOW('d', 112, int)
-#define DIOCGKERNELDUMP                _IOW('d', 133, u_int)   /* Set/Clear kernel dumps */
+#define DIOCGKERNELDUMP                _IOW('d', 133, u_int)   /* Set/Clear dumps */
+#define DIOCRECLUSTER          _IOWR('d', 134, struct disk_ioc_recluster)
 #define        MAX_SLICES              16
 
 /*
@@ -167,6 +168,10 @@ struct diskslices {
                dss_slices[MAX_SLICES]; /* actually usually less */
 };
 
+struct disk_ioc_recluster {
+       int     fd;
+};
+
 /*
  * DIOCGPART ioctl - returns information about a disk, slice, or partition.
  * This ioctl is primarily used to get the block size and media size.
index bd077f0..d0deba1 100644 (file)
@@ -746,7 +746,6 @@ struct kdmsg_iocom {
        struct lock             msglk;          /* lockmgr lock */
        TAILQ_HEAD(, kdmsg_msg) msgq;           /* transmit queue */
        void                    *handle;
-       void                    (*clusterctl_wakeup)(struct kdmsg_iocom *);
        int                     (*lnk_rcvmsg)(kdmsg_msg_t *msg);
        int                     (*dbg_rcvmsg)(kdmsg_msg_t *msg);
        int                     (*misc_rcvmsg)(kdmsg_msg_t *msg);
@@ -769,13 +768,12 @@ uint32_t kdmsg_icrc32c(const void *buf, size_t size, uint32_t crc);
 void kdmsg_iocom_init(kdmsg_iocom_t *iocom,
                        void *handle,
                        struct malloc_type *mmsg,
-                       void (*cctl_wakeup)(kdmsg_iocom_t *),
                        int (*lnk_rcvmsg)(kdmsg_msg_t *msg),
                        int (*dbg_rcvmsg)(kdmsg_msg_t *msg),
                        int (*misc_rcvmsg)(kdmsg_msg_t *msg));
-
 void kdmsg_iocom_reconnect(kdmsg_iocom_t *iocom, struct file *fp,
                        const char *subsysname);
+void kdmsg_iocom_uninit(kdmsg_iocom_t *iocom);
 void kdmsg_drain_msgq(kdmsg_iocom_t *iocom);
 
 int kdmsg_state_msgrx(kdmsg_msg_t *msg);
index 7890c06..b8c6bdd 100644 (file)
@@ -355,7 +355,6 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
 
        kmalloc_create(&pmp->mmsg, "HAMMER2-pfsmsg");
        kdmsg_iocom_init(&pmp->iocom, pmp, pmp->mmsg,
-                        hammer2_clusterctl_wakeup,
                         hammer2_msg_lnk_rcvmsg,
                         hammer2_msg_dbg_rcvmsg,
                         hammer2_msg_adhoc_input);
@@ -593,21 +592,9 @@ hammer2_vfs_unmount(struct mount *mp, int mntflags)
        ccms_domain_uninit(&pmp->ccms_dom);
 
        /*
-        * Ask the cluster controller to go away
+        * Kill cluster controller
         */
-       atomic_set_int(&pmp->iocom.msg_ctl, KDMSG_CLUSTERCTL_KILL);
-       while (pmp->iocom.msgrd_td || pmp->iocom.msgwr_td) {
-               wakeup(&pmp->iocom.msg_ctl);
-               tsleep(pmp, 0, "clstrkl", hz);
-       }
-
-       /*
-        * Drop communications descriptor
-        */
-       if (pmp->iocom.msg_fp) {
-               fdrop(pmp->iocom.msg_fp);
-               pmp->iocom.msg_fp = NULL;
-       }
+       kdmsg_iocom_uninit(&pmp->iocom);
 
        /*
         * If no PFS's left drop the master hammer2_mount for the device.
@@ -1047,23 +1034,6 @@ hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp)
        kdmsg_msg_write(msg);
 }
 
-/*
- * Called with msglk held after queueing a new message, wakes up the
- * transmit thread.  We use an interlock thread to avoid unnecessary
- * wakeups.
- */
-void
-hammer2_clusterctl_wakeup(kdmsg_iocom_t *iocom)
-{
-       hammer2_pfsmount_t *pmp = iocom->handle;
-
-       if (pmp->iocom.msg_ctl & KDMSG_CLUSTERCTL_SLEEPING) {
-               atomic_clear_int(&pmp->iocom.msg_ctl,
-                                KDMSG_CLUSTERCTL_SLEEPING);
-               wakeup(&pmp->iocom.msg_ctl);
-       }
-}
-
 static int
 hammer2_msg_lnk_rcvmsg(kdmsg_msg_t *msg)
 {