From bfc3a7b1a6cc1c380d88b1a095076132618294d7 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Fri, 8 Jun 2012 22:03:48 -0700 Subject: [PATCH] hammer2 - Wire-up the kernel<->userland messaging pipe This commit starts coding up the cluster controller messaging infrastructure. The cluster controller is a userland program typically running on the same machine (but doesn't have to be). The controller will be able to act in several capacities ranging from simple remote mounts with no local storage to mirroring setups, master/slave setups, and ultimately quorum setups. Since communication is over a socket it will eventually be possible to implement a diskless hammer2 root mount without the need for a local controller. The VFS only talks over one socket, so in quorum or multi-connection setups the local cluster controller will deal with the complexity of managing multiple connections and the hammer2 VFS messaging interface remains simple. The hammer2 VFS will also use this interface to request cache state grants and, being a two-way protocol, the other end can request cache state invalidations or downgrades. * hammer2_mount now starts 'hammer2 service', connects to it via a socket, and passes the socket descriptor to the mount(). * The hammer2 VFS now refs the passed-in file pointer and starts a reader and writer thread to manage it. The code does not yet process actual messages. --- sbin/mount_hammer2/mount_hammer2.c | 77 ++++++++++++++++++++++- sys/vfs/hammer2/hammer2.h | 9 ++- sys/vfs/hammer2/hammer2_mount.h | 2 +- sys/vfs/hammer2/hammer2_vfsops.c | 98 +++++++++++++++++++++++++++++- 4 files changed, 180 insertions(+), 6 deletions(-) diff --git a/sbin/mount_hammer2/mount_hammer2.c b/sbin/mount_hammer2/mount_hammer2.c index 7a2688ae71..bf2ece2fba 100644 --- a/sbin/mount_hammer2/mount_hammer2.c +++ b/sbin/mount_hammer2/mount_hammer2.c @@ -34,11 +34,16 @@ */ #include #include +#include +#include #include #include #include #include +#include + +static int cluster_connect(const char *volume); /* * Usage: mount_hammer2 [volume] [mtpt] @@ -64,11 +69,81 @@ main(int argc, char *argv[]) exit(1); } + /* + * Connect to the cluster controller. This handles both remote + * mounts and device cache/master/slave mounts. + * + * When doing remote mounts that are allowed to run in the background + * the mount program will fork, detach, print a message, and exit(0) + * the originator while retrying in the background. + */ + info.cluster_fd = cluster_connect(argv[1]); + if (info.cluster_fd < 0) { + fprintf(stderr, + "hammer2_mount: cluster_connect(%s) failed\n", + argv[1]); + exit(1); + } + + /* + * Try to mount it + */ info.volume = argv[1]; info.hflags = 0; mountpt = argv[2]; error = mount(vfc.vfc_name, mountpt, mount_flags, &info); - if (error) + if (error) { perror("mount: "); + exit(1); + } + + /* + * XXX fork a backgrounded reconnector process to handle connection + * failures. XXX + */ + + return (0); +} + +/* + * Connect to the cluster controller. We can connect to a local or remote + * cluster controller, depending. For a multi-node cluster we always want + * to connect to the local controller and let it maintain the connections + * to the multiple remote nodes. + */ +static +int +cluster_connect(const char *volume __unused) +{ + struct sockaddr_in lsin; + int fd; + + /* + * This starts the hammer2 service if it isn't already running, + * so we can connect to it. + */ + system("/sbin/hammer2 -q service"); + + /* + * Connect us to the service but leave the rest to the kernel. + * If the connection is lost during the mount + */ + if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) { + perror("socket"); + return(-1); + } + bzero(&lsin, sizeof(lsin)); + lsin.sin_family = AF_INET; + lsin.sin_addr.s_addr = 0; + lsin.sin_port = htons(HAMMER2_LISTEN_PORT); + + if (connect(fd, (struct sockaddr *)&lsin, sizeof(lsin)) < 0) { + close(fd); + fprintf(stderr, "mount_hammer2: unable to connect to " + "cluster controller\n"); + return(-1); + } + + return(fd); } diff --git a/sys/vfs/hammer2/hammer2.h b/sys/vfs/hammer2/hammer2.h index 37449fe5eb..3a1cc442af 100644 --- a/sys/vfs/hammer2/hammer2.h +++ b/sys/vfs/hammer2/hammer2.h @@ -293,7 +293,8 @@ struct hammer2_mount { struct lock voldatalk; /* lockmgr lock */ hammer2_volume_data_t voldata; - hammer2_freecache_t freecache[HAMMER2_FREECACHE_TYPES][HAMMER2_MAX_RADIX+1]; + hammer2_freecache_t freecache[HAMMER2_FREECACHE_TYPES] + [HAMMER2_MAX_RADIX+1]; }; typedef struct hammer2_mount hammer2_mount_t; @@ -309,10 +310,16 @@ struct hammer2_pfsmount { ccms_domain_t ccms_dom; struct netexport export; /* nfs export */ int ronly; /* read-only mount */ + struct file *msg_fp; /* cluster pipe->userland */ + thread_t msgrd_td; /* cluster thread */ + thread_t msgwr_td; /* cluster thread */ + int msg_ctl; /* wakeup flags */ }; typedef struct hammer2_pfsmount hammer2_pfsmount_t; +#define HAMMER2_CLUSTERCTL_KILL 0x0001 + #if defined(_KERNEL) MALLOC_DECLARE(M_HAMMER2); diff --git a/sys/vfs/hammer2/hammer2_mount.h b/sys/vfs/hammer2/hammer2_mount.h index ecedae32be..d10ae8b9da 100644 --- a/sys/vfs/hammer2/hammer2_mount.h +++ b/sys/vfs/hammer2/hammer2_mount.h @@ -45,7 +45,7 @@ struct hammer2_mount_info { const char *volume; int hflags; /* extended hammer mount flags */ - int unused01; + int cluster_fd; /* cluster management pipe/socket */ char reserved1[112]; }; diff --git a/sys/vfs/hammer2/hammer2_vfsops.c b/sys/vfs/hammer2/hammer2_vfsops.c index 8278642fe5..cad6538854 100644 --- a/sys/vfs/hammer2/hammer2_vfsops.c +++ b/sys/vfs/hammer2/hammer2_vfsops.c @@ -42,10 +42,12 @@ #include #include #include +#include #include "hammer2.h" #include "hammer2_disk.h" #include "hammer2_mount.h" +#include "hammer2_network.h" struct hammer2_sync_info { int error; @@ -135,6 +137,9 @@ static int hammer2_install_volume_header(hammer2_mount_t *hmp); static int hammer2_sync_scan1(struct mount *mp, struct vnode *vp, void *data); static int hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data); +static void hammer2_cluster_thread_rd(void *arg); +static void hammer2_cluster_thread_wr(void *arg); + /* * HAMMER2 vfs operations. */ @@ -206,7 +211,7 @@ hammer2_vfs_init(struct vfsconf *conf) static int hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, - struct ucred *cred) + struct ucred *cred) { struct hammer2_mount_info info; hammer2_pfsmount_t *pmp; @@ -238,6 +243,8 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, /* * Root mount */ + bzero(&info, sizeof(info)); + info.cluster_fd = -1; return (EOPNOTSUPP); } else { /* @@ -274,9 +281,10 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, } /* - * New non-root mount + * PFS mount + * + * Lookup name and verify it refers to a block device. */ - /* Lookup name and verify it refers to a block device */ error = nlookup_init(&nd, dev, UIO_SYSSPACE, NLC_FOLLOW); if (error == 0) error = nlookup(&nd); @@ -462,6 +470,24 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, kprintf("iroot %p\n", pmp->iroot); + /* + * Ref the cluster management messaging descriptor. The mount + * program deals with the other end of the communications pipe. + */ + pmp->msg_fp = holdfp(curproc->p_fd, info.cluster_fd, -1); + if (pmp->msg_fp == NULL) { + kprintf("hammer2_mount: bad cluster_fd!\n"); + hammer2_vfs_unmount(mp, MNT_FORCE); + return EBADF; + } + lwkt_create(hammer2_cluster_thread_rd, pmp, &pmp->msgrd_td, + NULL, 0, -1, "hammer2-msgrd"); + lwkt_create(hammer2_cluster_thread_wr, pmp, &pmp->msgwr_td, + NULL, 0, -1, "hammer2-msgwr"); + + /* + * Finish setup + */ vfs_getnewfsid(mp); vfs_add_vnodeops(mp, &hammer2_vnode_vops, &mp->mnt_vn_norm_ops); vfs_add_vnodeops(mp, &hammer2_spec_vops, &mp->mnt_vn_spec_ops); @@ -474,6 +500,9 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, sizeof(mp->mnt_stat.f_mntonname) - 1, &size); + /* + * Initial statfs to prime mnt_stat. + */ hammer2_vfs_statfs(mp, &mp->mnt_stat, cred); return 0; @@ -557,6 +586,27 @@ hammer2_vfs_unmount(struct mount *mp, int mntflags) pmp->rchain = NULL; } ccms_domain_uninit(&pmp->ccms_dom); + + /* + * Ask the cluster controller to go away + */ + atomic_set_int(&pmp->msg_ctl, HAMMER2_CLUSTERCTL_KILL); + while (pmp->msgrd_td || pmp->msgwr_td) { + wakeup(&pmp->msg_ctl); + tsleep(pmp, 0, "clstrkl", hz); + } + + /* + * Drop communications descriptor + */ + if (pmp->msg_fp) { + fdrop(pmp->msg_fp); + pmp->msg_fp = NULL; + } + + /* + * If no PFS's left drop the master hammer2_mount for the device. + */ if (hmp->pmp_count == 0) { if (hmp->schain) { KKASSERT(hmp->schain->refs == 1); @@ -945,3 +995,45 @@ hammer2_install_volume_header(hammer2_mount_t *hmp) return (error); } +/* + * Cluster controller thread. Perform messaging functions. We have one + * thread for the reader and one for the writer. The writer handles + * shutdown requests (which should break the reader thread). + */ +static +void +hammer2_cluster_thread_rd(void *arg) +{ + hammer2_pfsmount_t *pmp = arg; + hammer2_any_t any; + int error; + + while ((pmp->msg_ctl & HAMMER2_CLUSTERCTL_KILL) == 0) { + error = fp_read(pmp->msg_fp, + any.buf, sizeof(hammer2_msg_hdr_t), + NULL, 1, UIO_SYSSPACE); + kprintf("fp_read %d\n", error); + if (error) + break; + } + pmp->msgrd_td = NULL; + /* pmp can be ripped out from under us at this point */ + wakeup(pmp); + lwkt_exit(); +} + +static +void +hammer2_cluster_thread_wr(void *arg) +{ + hammer2_pfsmount_t *pmp = arg; + + while ((pmp->msg_ctl & HAMMER2_CLUSTERCTL_KILL) == 0) { + tsleep(&pmp->msg_ctl, 0, "msgwr", hz); + } + fp_shutdown(pmp->msg_fp, SHUT_RDWR); + pmp->msgwr_td = NULL; + /* pmp can be ripped out from under us at this point */ + wakeup(pmp); + lwkt_exit(); +} -- 2.41.0