From 185ace9331437cc6a4344ddda4153972049dbc86 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 25 Oct 2012 16:00:47 -0700 Subject: [PATCH] hammer2 - Add server-side disk advertisements * The hammer2 service demon now tracks disks and connects the cluster controller to each one. * Add a new ioctl to the subr_disk subsystem and add subr_diskiocom.c to implement it, associating a cluster controller messaging descriptor with a disk device. * disk device initiates LNK_CONN and LNK_SPAN. Disk block devices now advertised through the spanning tree. Nothing else implemented yet. --- lib/libdmsg/dmsg.h | 2 + lib/libdmsg/msg_lnk.c | 22 ++- lib/libdmsg/service.c | 2 + sbin/hammer2/cmd_service.c | 111 ++++++++++++++ sbin/hammer2/hammer2.h | 2 + sys/conf/files | 1 + sys/kern/kern_dmsg.c | 35 ++++- sys/kern/subr_disk.c | 17 ++- sys/kern/subr_diskiocom.c | 252 +++++++++++++++++++++++++++++++ sys/sys/disk.h | 15 +- sys/sys/diskslice.h | 7 +- sys/sys/dmsg.h | 4 +- sys/vfs/hammer2/hammer2_vfsops.c | 34 +---- 13 files changed, 456 insertions(+), 48 deletions(-) create mode 100644 sys/kern/subr_diskiocom.c diff --git a/lib/libdmsg/dmsg.h b/lib/libdmsg/dmsg.h index ade8ed448c..0a0acf4d73 100644 --- a/lib/libdmsg/dmsg.h +++ b/lib/libdmsg/dmsg.h @@ -321,7 +321,9 @@ struct crypto_algo { struct dmsg_master_service_info { int fd; int detachme; + void *handle; void (*dbgmsg_callback)(dmsg_msg_t *msg); + void (*exit_callback)(void *handle); }; typedef struct dmsg_master_service_info dmsg_master_service_info_t; diff --git a/lib/libdmsg/msg_lnk.c b/lib/libdmsg/msg_lnk.c index 6a48c6bbd8..67cc7a9670 100644 --- a/lib/libdmsg/msg_lnk.c +++ b/lib/libdmsg/msg_lnk.c @@ -246,8 +246,9 @@ struct h2span_node { RB_ENTRY(h2span_node) rbnode; struct h2span_link_tree tree; struct h2span_cluster *cls; + uint8_t peer_type; uuid_t pfs_fsid; /* unique fsid */ - char label[64]; + char label[64]; }; struct h2span_link { @@ -306,7 +307,16 @@ static int h2span_node_cmp(h2span_node_t *node1, h2span_node_t *node2) { - return(uuid_compare(&node1->pfs_fsid, &node2->pfs_fsid, NULL)); + int r; + + if (node1->peer_type < node2->peer_type) + return(-1); + if (node1->peer_type > node2->peer_type) + return(1); + r = uuid_compare(&node1->pfs_fsid, &node2->pfs_fsid, NULL); + if (r == 0 && node1->peer_type == DMSG_PEER_BLOCK) + r = strcmp(node1->label, node2->label); + return (r); } /* @@ -651,15 +661,19 @@ dmsg_lnk_span(dmsg_msg_t *msg) * Find the node */ dummy_node.pfs_fsid = msg->any.lnk_span.pfs_fsid; + dummy_node.peer_type = msg->any.lnk_span.peer_type; + snprintf(dummy_node.label, sizeof(dummy_node.label), + "%s", msg->any.lnk_span.label); node = RB_FIND(h2span_node_tree, &cls->tree, &dummy_node); if (node == NULL) { node = dmsg_alloc(sizeof(*node)); node->pfs_fsid = msg->any.lnk_span.pfs_fsid; + node->peer_type = msg->any.lnk_span.peer_type; + snprintf(node->label, sizeof(node->label), + "%s", msg->any.lnk_span.label); node->cls = cls; RB_INIT(&node->tree); RB_INSERT(h2span_node_tree, &cls->tree, node); - snprintf(node->label, sizeof(node->label), - "%s", msg->any.lnk_span.label); } /* diff --git a/lib/libdmsg/service.c b/lib/libdmsg/service.c index 58509bd60c..387b1cbcd6 100644 --- a/lib/libdmsg/service.c +++ b/lib/libdmsg/service.c @@ -66,6 +66,8 @@ dmsg_master_service(void *data) info->fd, iocom.ioq_rx.error, iocom.ioq_tx.error); close(info->fd); info->fd = -1; /* safety */ + if (info->exit_callback) + info->exit_callback(info->handle); free(info); return (NULL); diff --git a/sbin/hammer2/cmd_service.c b/sbin/hammer2/cmd_service.c index 5ee6406df1..de396e8ebc 100644 --- a/sbin/hammer2/cmd_service.c +++ b/sbin/hammer2/cmd_service.c @@ -35,9 +35,21 @@ #include "hammer2.h" +struct diskcon { + TAILQ_ENTRY(diskcon) entry; + char *disk; +}; + +#define WS " \r\n" + +TAILQ_HEAD(, diskcon) diskconq = TAILQ_HEAD_INITIALIZER(diskconq); +pthread_mutex_t diskmtx; + static void *service_thread(void *data); static void *udev_thread(void *data); static void master_reconnect(const char *mntpt); +static void disk_reconnect(const char *disk); +static void disk_disconnect(void *handle); static void udev_check_disks(void); /* @@ -210,6 +222,7 @@ udev_check_disks(void) { char tmpbuf[1024]; char *buf = NULL; + char *disk; int error; size_t n; @@ -236,6 +249,9 @@ udev_check_disks(void) } if (buf) { fprintf(stderr, "DISKS: %s\n", buf); + for (disk = strtok(buf, WS); disk; disk = strtok(NULL, WS)) { + disk_reconnect(disk); + } if (buf != tmpbuf) free(buf); } @@ -293,3 +309,98 @@ master_reconnect(const char *mntpt) info->dbgmsg_callback = hammer2_shell_parse; pthread_create(&thread, NULL, dmsg_master_service, info); } + +/* + * Reconnect a physical disk to the mesh. + */ +static +void +disk_reconnect(const char *disk) +{ + struct disk_ioc_recluster recls; + struct diskcon *dc; + dmsg_master_service_info_t *info; + pthread_t thread; + int fd; + int pipefds[2]; + char *path; + + /* + * Urm, this will auto-create mdX+1, just ignore for now. + * This mechanic needs to be fixed. It might actually be nice + * to be able to export md disks. + */ + if (strncmp(disk, "md", 2) == 0) + return; + + /* + * Check if already connected + */ + pthread_mutex_lock(&diskmtx); + TAILQ_FOREACH(dc, &diskconq, entry) { + if (strcmp(dc->disk, disk) == 0) + break; + } + pthread_mutex_unlock(&diskmtx); + if (dc) + return; + + /* + * Not already connected, create a connection to the kernel + * disk driver. + */ + asprintf(&path, "/dev/%s", disk); + fd = open(path, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "reconnect %s: no access to disk\n", disk); + free(path); + return; + } + free(path); + if (pipe(pipefds) < 0) { + fprintf(stderr, "reconnect %s: pipe() failed\n", disk); + close(fd); + return; + } + bzero(&recls, sizeof(recls)); + recls.fd = pipefds[0]; + if (ioctl(fd, DIOCRECLUSTER, &recls) < 0) { + fprintf(stderr, "reconnect %s: ioctl failed\n", disk); + close(pipefds[0]); + close(pipefds[1]); + close(fd); + return; + } + close(pipefds[0]); + close(fd); + + dc = malloc(sizeof(*dc)); + dc->disk = strdup(disk); + pthread_mutex_lock(&diskmtx); + TAILQ_INSERT_TAIL(&diskconq, dc, entry); + pthread_mutex_unlock(&diskmtx); + + info = malloc(sizeof(*info)); + bzero(info, sizeof(*info)); + info->fd = pipefds[1]; + info->detachme = 1; + info->dbgmsg_callback = hammer2_shell_parse; + info->exit_callback = disk_disconnect; + info->handle = dc; + pthread_create(&thread, NULL, dmsg_master_service, info); +} + +static +void +disk_disconnect(void *handle) +{ + struct diskcon *dc = handle; + + fprintf(stderr, "DISK_DISCONNECT %s\n", dc->disk); + + pthread_mutex_lock(&diskmtx); + TAILQ_REMOVE(&diskconq, dc, entry); + pthread_mutex_unlock(&diskmtx); + free(dc->disk); + free(dc); +} diff --git a/sbin/hammer2/hammer2.h b/sbin/hammer2/hammer2.h index 95d473e34f..7e7c439001 100644 --- a/sbin/hammer2/hammer2.h +++ b/sbin/hammer2/hammer2.h @@ -38,6 +38,7 @@ */ #include #include +#include #include #include #include @@ -47,6 +48,7 @@ #include #include #include +#include #include #include diff --git a/sys/conf/files b/sys/conf/files index fdf3c43380..a612110027 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -899,6 +899,7 @@ kern/subr_bus.c standard kern/subr_busdma.c standard kern/subr_devstat.c standard kern/subr_disk.c standard +kern/subr_diskiocom.c standard kern/subr_disklabel32.c standard kern/subr_disklabel64.c standard kern/subr_diskslice.c standard diff --git a/sys/kern/kern_dmsg.c b/sys/kern/kern_dmsg.c index 0bf8723077..a6572f8a23 100644 --- a/sys/kern/kern_dmsg.c +++ b/sys/kern/kern_dmsg.c @@ -63,7 +63,6 @@ static void kdmsg_iocom_thread_wr(void *arg); void kdmsg_iocom_init(kdmsg_iocom_t *iocom, void *handle, struct malloc_type *mmsg, - void (*cctl_wakeup)(kdmsg_iocom_t *), int (*lnk_rcvmsg)(kdmsg_msg_t *msg), int (*dbg_rcvmsg)(kdmsg_msg_t *msg), int (*misc_rcvmsg)(kdmsg_msg_t *msg)) @@ -71,7 +70,6 @@ kdmsg_iocom_init(kdmsg_iocom_t *iocom, void *handle, bzero(iocom, sizeof(*iocom)); iocom->handle = handle; iocom->mmsg = mmsg; - iocom->clusterctl_wakeup = cctl_wakeup; iocom->lnk_rcvmsg = lnk_rcvmsg; iocom->dbg_rcvmsg = dbg_rcvmsg; iocom->misc_rcvmsg = misc_rcvmsg; @@ -121,6 +119,31 @@ kdmsg_iocom_reconnect(kdmsg_iocom_t *iocom, struct file *fp, NULL, 0, -1, "%s-msgwr", subsysname); } +/* + * Disconnect and clean up + */ +void +kdmsg_iocom_uninit(kdmsg_iocom_t *iocom) +{ + /* + * Ask the cluster controller to go away + */ + atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL); + + while (iocom->msgrd_td || iocom->msgwr_td) { + wakeup(&iocom->msg_ctl); + tsleep(iocom, 0, "clstrkl", hz); + } + + /* + * Drop communications descriptor + */ + if (iocom->msg_fp) { + fdrop(iocom->msg_fp); + iocom->msg_fp = NULL; + } +} + /* * Cluster controller thread. Perform messaging functions. We have one * thread for the reader and one for the writer. The writer handles @@ -1221,7 +1244,13 @@ kdmsg_msg_write(kdmsg_msg_t *msg) msg->any.head.hdr_crc = iscsi_crc32(msg->any.buf, msg->hdr_size); TAILQ_INSERT_TAIL(&iocom->msgq, msg, qentry); - iocom->clusterctl_wakeup(iocom); + + if (iocom->msg_ctl & KDMSG_CLUSTERCTL_SLEEPING) { + atomic_clear_int(&iocom->msg_ctl, + KDMSG_CLUSTERCTL_SLEEPING); + wakeup(&iocom->msg_ctl); + } + lockmgr(&iocom->msglk, LK_RELEASE); } diff --git a/sys/kern/subr_disk.c b/sys/kern/subr_disk.c index e8f40cf3b9..a83832f483 100644 --- a/sys/kern/subr_disk.c +++ b/sys/kern/subr_disk.c @@ -183,9 +183,8 @@ disk_probe_slice(struct disk *dp, cdev_t dev, int slice, int reprobe) int sno; u_int i; - disk_debug(2, - "disk_probe_slice (begin): %s (%s)\n", - dev->si_name, dp->d_cdev->si_name); + disk_debug(2, "disk_probe_slice (begin): %s (%s)\n", + dev->si_name, dp->d_cdev->si_name); sno = slice ? slice - 1 : 0; @@ -475,6 +474,7 @@ disk_msg_core(void *arg) disk_debug(1, "DISK_DISK_PROBE: %s\n", dp->d_cdev->si_name); + disk_iocom_update(dp); disk_probe(dp, 0); break; case DISK_DISK_DESTROY: @@ -482,6 +482,7 @@ disk_msg_core(void *arg) disk_debug(1, "DISK_DISK_DESTROY: %s\n", dp->d_cdev->si_name); + disk_iocom_uninit(dp); devfs_destroy_related(dp->d_cdev); destroy_dev(dp->d_cdev); destroy_only_dev(dp->d_rawdev); @@ -691,8 +692,10 @@ _disk_create_named(const char *name, int unit, struct disk *dp, LIST_INSERT_HEAD(&disklist, dp, d_list); lwkt_reltoken(&disklist_token); + disk_iocom_init(dp); + disk_debug(1, "disk_create (end): %s%d\n", - (name != NULL)?(name):(raw_ops->head.name), unit); + (name != NULL)?(name):(raw_ops->head.name), unit); return (dp->d_rawdev); } @@ -1065,6 +1068,12 @@ diskioctl(struct dev_ioctl_args *ap) return disk_dumpconf(dev, u); } + if (ap->a_cmd == DIOCRECLUSTER && dev == dp->d_cdev) { + kprintf("RECLUSTER\n"); + error = disk_iocom_ioctl(dp, ap->a_cmd, ap->a_data); + return error; + } + if (&dp->d_slice == NULL || dp->d_slice == NULL || ((dp->d_info.d_dsflags & DSO_DEVICEMAPPER) && dkslice(dev) == WHOLE_DISK_SLICE)) { diff --git a/sys/kern/subr_diskiocom.c b/sys/kern/subr_diskiocom.c new file mode 100644 index 0000000000..2c6bfff441 --- /dev/null +++ b/sys/kern/subr_diskiocom.c @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2012 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +static MALLOC_DEFINE(M_DMSG_DISK, "dmsg_disk", "disk dmsg"); + +static int disk_iocom_reconnect(struct disk *dp, struct file *fp); +static int disk_msg_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg); +static int disk_msg_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg); + +void +disk_iocom_init(struct disk *dp) +{ + kdmsg_iocom_init(&dp->d_iocom, dp, M_DMSG_DISK, + disk_lnk_rcvmsg, + disk_dbg_rcvmsg, + disk_adhoc_input); +} + +void +disk_iocom_update(struct disk *dp) +{ +} + +void +disk_iocom_uninit(struct disk *dp) +{ + kdmsg_iocom_uninit(&dp->d_iocom); +} + +int +disk_iocom_ioctl(struct disk *dp, int cmd, void *data) +{ + struct file *fp; + struct disk_ioc_recluster *recl; + int error; + + switch(cmd) { + case DIOCRECLUSTER: + recl = data; + fp = holdfp(curproc->p_fd, recl->fd, -1); + if (fp) { + error = disk_iocom_reconnect(dp, fp); + } else { + error = EINVAL; + } + break; + default: + error = EOPNOTSUPP; + break; + } + return error; +} + +static +int +disk_iocom_reconnect(struct disk *dp, struct file *fp) +{ + kdmsg_msg_t *msg; + char devname[64]; + + ksnprintf(devname, sizeof(devname), "%s%d", + dev_dname(dp->d_rawdev), dkunit(dp->d_rawdev)); + + kdmsg_iocom_reconnect(&dp->d_iocom, fp, devname); + + msg = kdmsg_msg_alloc(&dp->d_iocom.router, DMSG_LNK_CONN | DMSGF_CREATE, + disk_msg_conn_reply, dp); + msg->any.lnk_conn.pfs_type = 0; + msg->any.lnk_conn.proto_version = DMSG_SPAN_PROTO_1; + msg->any.lnk_conn.peer_type = DMSG_PEER_BLOCK; + msg->any.lnk_conn.peer_mask = 1LLU << DMSG_PEER_BLOCK; + + ksnprintf(msg->any.lnk_conn.label, sizeof(msg->any.lnk_conn.label), + "%s/%s", hostname, devname); + dp->d_iocom.conn_state = msg->state; + kdmsg_msg_write(msg); + + return (0); +} + +/* + * Received reply to our LNK_CONN transaction, indicating LNK_SPAN support. + * Issue LNK_SPAN. + */ +static +int +disk_msg_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg) +{ + struct disk *dp = state->any.any; + kdmsg_msg_t *rmsg; + + if (msg->any.head.cmd & DMSGF_CREATE) { + kprintf("DISK LNK_CONN received reply\n"); + rmsg = kdmsg_msg_alloc(&dp->d_iocom.router, + DMSG_LNK_SPAN | DMSGF_CREATE, + disk_msg_span_reply, dp); + rmsg->any.lnk_span.pfs_type = 0; + rmsg->any.lnk_span.proto_version = DMSG_SPAN_PROTO_1; + rmsg->any.lnk_span.peer_type = DMSG_PEER_BLOCK; + + ksnprintf(rmsg->any.lnk_span.label, + sizeof(rmsg->any.lnk_span.label), + "%s/%s%d", + hostname, + dev_dname(dp->d_rawdev), + dkunit(dp->d_rawdev)); + kdmsg_msg_write(rmsg); + } + if ((state->txcmd & DMSGF_DELETE) == 0 && + (msg->any.head.cmd & DMSGF_DELETE)) { + kprintf("DISK LNK_CONN terminated by remote\n"); + dp->d_iocom.conn_state = NULL; + kdmsg_msg_reply(msg, 0); + } + return(0); +} + +/* + * Reply to our LNK_SPAN. The transaction is left open. + */ +static +int +disk_msg_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg) +{ + /*struct disk *dp = state->any.any;*/ + + kprintf("DISK LNK_SPAN reply received\n"); + if ((state->txcmd & DMSGF_DELETE) == 0 && + (msg->any.head.cmd & DMSGF_DELETE)) { + kdmsg_msg_reply(msg, 0); + } + return (0); +} + +int +disk_lnk_rcvmsg(kdmsg_msg_t *msg) +{ + /*struct disk *dp = msg->router->iocom->handle;*/ + + switch(msg->any.head.cmd & DMSGF_TRANSMASK) { + case DMSG_LNK_CONN | DMSGF_CREATE: + /* + * reply & leave trans open + */ + kprintf("DISK CONN RECEIVE - (just ignore it)\n"); + kdmsg_msg_result(msg, 0); + break; + case DMSG_LNK_SPAN | DMSGF_CREATE: + kprintf("DISK SPAN RECEIVE - ADDED FROM CLUSTER\n"); + break; + case DMSG_LNK_SPAN | DMSGF_DELETE: + kprintf("DISK SPAN RECEIVE - DELETED FROM CLUSTER\n"); + break; + default: + break; + } + return (0); +} + +int +disk_dbg_rcvmsg(kdmsg_msg_t *msg) +{ + /*struct disk *dp = msg->router->iocom->handle;*/ + + switch(msg->any.head.cmd & DMSGF_CMDSWMASK) { + case DMSG_DBG_SHELL: + /* + * Execute shell command (not supported atm) + */ + kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); + break; + case DMSG_DBG_SHELL | DMSGF_REPLY: + if (msg->aux_data) { + msg->aux_data[msg->aux_size - 1] = 0; + kprintf("DEBUGMSG: %s\n", msg->aux_data); + } + break; + default: + kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); + break; + } + return (0); +} + +int +disk_adhoc_input(kdmsg_msg_t *msg) +{ + struct disk *dp = msg->router->iocom->handle; + + kprintf("DISK ADHOC INPUT %s%d\n", + dev_dname(dp->d_rawdev), dkunit(dp->d_rawdev)); + + return (0); +} diff --git a/sys/sys/disk.h b/sys/sys/disk.h index 1dfdff9ab1..3aa58c0e04 100644 --- a/sys/sys/disk.h +++ b/sys/sys/disk.h @@ -58,6 +58,9 @@ #ifndef _SYS_MSGPORT_H_ #include #endif +#ifndef _SYS_DMSG_H_ +#include +#endif /* * Media information structure - filled in by the media driver. @@ -142,6 +145,7 @@ struct disk { struct dsched_policy *d_sched_policy;/* I/O scheduler policy */ const char *d_disktype; /* Disk type information */ LIST_ENTRY(disk) d_list; + kdmsg_iocom_t d_iocom; /* cluster import/export */ }; /* @@ -173,6 +177,15 @@ void disk_config(void *); int bounds_check_with_mediasize(struct bio *bio, int secsize, uint64_t mediasize); +void disk_iocom_init(struct disk *dp); +void disk_iocom_update(struct disk *dp); +void disk_iocom_uninit(struct disk *dp); +int disk_iocom_ioctl(struct disk *dp, int cmd, void *data); +void disk_clusterctl_wakeup(kdmsg_iocom_t *iocom); +int disk_lnk_rcvmsg(kdmsg_msg_t *msg); +int disk_dbg_rcvmsg(kdmsg_msg_t *msg); +int disk_adhoc_input(kdmsg_msg_t *msg); + typedef struct disk_msg { struct lwkt_msg hdr; void *load; @@ -184,7 +197,7 @@ typedef struct disk_msg { #define DISK_SLICE_REPROBE 0x03 #define DISK_DISK_REPROBE 0x04 #define DISK_UNPROBE 0x05 -#define DISK_SYNC 0x99 +#define DISK_SYNC 0x99 #endif /* _KERNEL */ diff --git a/sys/sys/diskslice.h b/sys/sys/diskslice.h index 415af6f5f0..8e11c7065a 100644 --- a/sys/sys/diskslice.h +++ b/sys/sys/diskslice.h @@ -94,7 +94,8 @@ #define DIOCWLABEL _IOW('d', 109, int) #define DIOCGSLICEINFO _IOR('d', 111, struct diskslices) #define DIOCSYNCSLICEINFO _IOW('d', 112, int) -#define DIOCGKERNELDUMP _IOW('d', 133, u_int) /* Set/Clear kernel dumps */ +#define DIOCGKERNELDUMP _IOW('d', 133, u_int) /* Set/Clear dumps */ +#define DIOCRECLUSTER _IOWR('d', 134, struct disk_ioc_recluster) #define MAX_SLICES 16 /* @@ -167,6 +168,10 @@ struct diskslices { dss_slices[MAX_SLICES]; /* actually usually less */ }; +struct disk_ioc_recluster { + int fd; +}; + /* * DIOCGPART ioctl - returns information about a disk, slice, or partition. * This ioctl is primarily used to get the block size and media size. diff --git a/sys/sys/dmsg.h b/sys/sys/dmsg.h index bd077f086c..d0deba1d3c 100644 --- a/sys/sys/dmsg.h +++ b/sys/sys/dmsg.h @@ -746,7 +746,6 @@ struct kdmsg_iocom { struct lock msglk; /* lockmgr lock */ TAILQ_HEAD(, kdmsg_msg) msgq; /* transmit queue */ void *handle; - void (*clusterctl_wakeup)(struct kdmsg_iocom *); int (*lnk_rcvmsg)(kdmsg_msg_t *msg); int (*dbg_rcvmsg)(kdmsg_msg_t *msg); int (*misc_rcvmsg)(kdmsg_msg_t *msg); @@ -769,13 +768,12 @@ uint32_t kdmsg_icrc32c(const void *buf, size_t size, uint32_t crc); void kdmsg_iocom_init(kdmsg_iocom_t *iocom, void *handle, struct malloc_type *mmsg, - void (*cctl_wakeup)(kdmsg_iocom_t *), int (*lnk_rcvmsg)(kdmsg_msg_t *msg), int (*dbg_rcvmsg)(kdmsg_msg_t *msg), int (*misc_rcvmsg)(kdmsg_msg_t *msg)); - void kdmsg_iocom_reconnect(kdmsg_iocom_t *iocom, struct file *fp, const char *subsysname); +void kdmsg_iocom_uninit(kdmsg_iocom_t *iocom); void kdmsg_drain_msgq(kdmsg_iocom_t *iocom); int kdmsg_state_msgrx(kdmsg_msg_t *msg); diff --git a/sys/vfs/hammer2/hammer2_vfsops.c b/sys/vfs/hammer2/hammer2_vfsops.c index 7890c0654d..b8c6bdda39 100644 --- a/sys/vfs/hammer2/hammer2_vfsops.c +++ b/sys/vfs/hammer2/hammer2_vfsops.c @@ -355,7 +355,6 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, kmalloc_create(&pmp->mmsg, "HAMMER2-pfsmsg"); kdmsg_iocom_init(&pmp->iocom, pmp, pmp->mmsg, - hammer2_clusterctl_wakeup, hammer2_msg_lnk_rcvmsg, hammer2_msg_dbg_rcvmsg, hammer2_msg_adhoc_input); @@ -593,21 +592,9 @@ hammer2_vfs_unmount(struct mount *mp, int mntflags) ccms_domain_uninit(&pmp->ccms_dom); /* - * Ask the cluster controller to go away + * Kill cluster controller */ - atomic_set_int(&pmp->iocom.msg_ctl, KDMSG_CLUSTERCTL_KILL); - while (pmp->iocom.msgrd_td || pmp->iocom.msgwr_td) { - wakeup(&pmp->iocom.msg_ctl); - tsleep(pmp, 0, "clstrkl", hz); - } - - /* - * Drop communications descriptor - */ - if (pmp->iocom.msg_fp) { - fdrop(pmp->iocom.msg_fp); - pmp->iocom.msg_fp = NULL; - } + kdmsg_iocom_uninit(&pmp->iocom); /* * If no PFS's left drop the master hammer2_mount for the device. @@ -1047,23 +1034,6 @@ hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp) kdmsg_msg_write(msg); } -/* - * Called with msglk held after queueing a new message, wakes up the - * transmit thread. We use an interlock thread to avoid unnecessary - * wakeups. - */ -void -hammer2_clusterctl_wakeup(kdmsg_iocom_t *iocom) -{ - hammer2_pfsmount_t *pmp = iocom->handle; - - if (pmp->iocom.msg_ctl & KDMSG_CLUSTERCTL_SLEEPING) { - atomic_clear_int(&pmp->iocom.msg_ctl, - KDMSG_CLUSTERCTL_SLEEPING); - wakeup(&pmp->iocom.msg_ctl); - } -} - static int hammer2_msg_lnk_rcvmsg(kdmsg_msg_t *msg) { -- 2.41.0