From 2063f4d7f86db92463c7a0f971721a9f94d1ef14 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sat, 11 Aug 2012 19:32:03 -0700 Subject: [PATCH] hammer2 - Add peer_type field to LNK_CONN and LNK_SPAN * Adds a peer_type field allowing connections to identify what they are (i.e. a HAMMER2 mount, a CLUSTER controller, a BLOCK device controller). Rename the pfs_type field in the volume header, which never made much sense, to peer_type, which now does make sense though for the moment the only value it can have is HAMER2_PEER_HAMMER2. * Filter HAMMER2_PEER_HAMMER2 peer types by pfs_clid to reduce unnecessary LNK_SPAN traffic being transmitted to a HAMMER2 mount (in the kernel). * Minor cleanup. --- sbin/hammer2/cmd_service.c | 10 +- sbin/hammer2/msg_lnk.c | 214 ++++++++++++++++------------- sbin/hammer2/network.h | 6 +- sbin/newfs_hammer2/newfs_hammer2.c | 2 + sys/vfs/hammer2/hammer2_disk.h | 14 +- sys/vfs/hammer2/hammer2_network.h | 31 ++++- sys/vfs/hammer2/hammer2_vfsops.c | 3 + 7 files changed, 172 insertions(+), 108 deletions(-) diff --git a/sbin/hammer2/cmd_service.c b/sbin/hammer2/cmd_service.c index 2819b052f4..0fd3ea70c2 100644 --- a/sbin/hammer2/cmd_service.c +++ b/sbin/hammer2/cmd_service.c @@ -200,6 +200,7 @@ master_reconnect(const char *mntpt) } if (pipe(pipefds) < 0) { fprintf(stderr, "reconnect %s: pipe() failed\n", mntpt); + close(fd); return; } bzero(&recls, sizeof(recls)); @@ -212,6 +213,7 @@ master_reconnect(const char *mntpt) return; } close(pipefds[0]); + close(fd); info = malloc(sizeof(*info)); bzero(info, sizeof(*info)); @@ -272,12 +274,14 @@ master_auth_signal(hammer2_router_t *router) * Transmit LNK_CONN, enabling the SPAN protocol if both sides * agree. * - * XXX put additional authentication states here + * XXX put additional authentication states here? */ msg = hammer2_msg_alloc(router, 0, HAMMER2_LNK_CONN | - HAMMER2_MSGF_CREATE, + HAMMER2_MSGF_CREATE, master_auth_conn_rx, NULL); - snprintf(msg->any.lnk_conn.label, sizeof(msg->any.lnk_conn.label), "*"); + msg->any.lnk_conn.peer_mask = (uint64_t)-1; + msg->any.lnk_conn.peer_type = HAMMER2_PEER_CLUSTER; + hammer2_msg_write(msg); hammer2_router_restate(router, diff --git a/sbin/hammer2/msg_lnk.c b/sbin/hammer2/msg_lnk.c index cb5afb721f..9e8d19569c 100644 --- a/sbin/hammer2/msg_lnk.c +++ b/sbin/hammer2/msg_lnk.c @@ -141,7 +141,7 @@ * outgoing LNK_SPAN transactions on each of our connections representing * the aggregated state. * - * h2span_connect - list of iocom connections who wish to receive SPAN + * h2span_conn - list of iocom connections who wish to receive SPAN * propagation from other connections. Might contain * a filter string. Only iocom's with an open * LNK_CONN transactions are applicable for SPAN @@ -186,7 +186,7 @@ struct h2span_link; struct h2span_relay; TAILQ_HEAD(h2span_media_queue, h2span_media); -TAILQ_HEAD(h2span_connect_queue, h2span_connect); +TAILQ_HEAD(h2span_conn_queue, h2span_conn); TAILQ_HEAD(h2span_relay_queue, h2span_relay); RB_HEAD(h2span_cluster_tree, h2span_cluster); @@ -224,8 +224,8 @@ typedef struct h2span_media_config h2span_media_config_t; * (may contain filter). Typically one for each mount and several may * share the same media. */ -struct h2span_connect { - TAILQ_ENTRY(h2span_connect) entry; +struct h2span_conn { + TAILQ_ENTRY(h2span_conn) entry; struct h2span_relay_tree tree; struct h2span_media *media; hammer2_state_t *state; @@ -267,7 +267,7 @@ struct h2span_link { * In many respects this is the core of the protocol... actually figuring * out what LNK_SPANs to relay. The spanid used for relaying is the * address of the 'state' structure, which is why h2span_relay has to - * be entered into a RB-TREE based at h2span_connect (so we can look + * be entered into a RB-TREE based at h2span_conn (so we can look * up the spanid to validate it). * * NOTE: Messages can be received via the LNK_SPAN transaction the @@ -279,9 +279,9 @@ struct h2span_link { * transaction the relay is holding open. */ struct h2span_relay { - RB_ENTRY(h2span_relay) rbnode; /* from h2span_connect */ + RB_ENTRY(h2span_relay) rbnode; /* from h2span_conn */ TAILQ_ENTRY(h2span_relay) entry; /* from link */ - struct h2span_connect *conn; + struct h2span_conn *conn; hammer2_state_t *state; /* transmitted LNK_SPAN */ struct h2span_link *link; /* LNK_SPAN being relayed */ struct hammer2_router *router;/* route out this relay */ @@ -289,7 +289,7 @@ struct h2span_relay { typedef struct h2span_media h2span_media_t; -typedef struct h2span_connect h2span_connect_t; +typedef struct h2span_conn h2span_conn_t; typedef struct h2span_cluster h2span_cluster_t; typedef struct h2span_node h2span_node_t; typedef struct h2span_link h2span_link_t; @@ -396,13 +396,13 @@ RB_GENERATE_STATIC(h2span_relay_tree, h2span_relay, */ static pthread_mutex_t cluster_mtx; static struct h2span_cluster_tree cluster_tree = RB_INITIALIZER(cluster_tree); -static struct h2span_connect_queue connq = TAILQ_HEAD_INITIALIZER(connq); +static struct h2span_conn_queue connq = TAILQ_HEAD_INITIALIZER(connq); static struct h2span_media_queue mediaq = TAILQ_HEAD_INITIALIZER(mediaq); static void hammer2_lnk_span(hammer2_msg_t *msg); static void hammer2_lnk_conn(hammer2_msg_t *msg); static void hammer2_lnk_relay(hammer2_msg_t *msg); -static void hammer2_relay_scan(h2span_connect_t *conn, h2span_node_t *node); +static void hammer2_relay_scan(h2span_conn_t *conn, h2span_node_t *node); static void hammer2_relay_delete(h2span_relay_t *relay); static void *hammer2_volconf_thread(void *info); @@ -448,7 +448,7 @@ hammer2_lnk_conn(hammer2_msg_t *msg) hammer2_state_t *state = msg->state; h2span_media_t *media; h2span_media_config_t *conf; - h2span_connect_t *conn; + h2span_conn_t *conn; h2span_relay_t *relay; char *alloc = NULL; int i; @@ -459,7 +459,7 @@ hammer2_lnk_conn(hammer2_msg_t *msg) case HAMMER2_LNK_CONN | HAMMER2_MSGF_CREATE: case HAMMER2_LNK_CONN | HAMMER2_MSGF_CREATE | HAMMER2_MSGF_DELETE: /* - * On transaction start we allocate a new h2span_connect and + * On transaction start we allocate a new h2span_conn and * acknowledge the request, leaving the transaction open. * We then relay priority-selected SPANs. */ @@ -505,7 +505,7 @@ hammer2_lnk_conn(hammer2_msg_t *msg) case HAMMER2_LNK_ERROR | HAMMER2_MSGF_DELETE: deleteconn: /* - * On transaction terminate we clean out our h2span_connect + * On transaction terminate we clean out our h2span_conn * and acknowledge the request, closing the transaction. */ fprintf(stderr, "LNK_CONN: Terminated\n"); @@ -805,10 +805,10 @@ hammer2_lnk_relay(hammer2_msg_t *msg) * Called with cluster_mtx held. */ static void hammer2_relay_scan_specific(h2span_node_t *node, - h2span_connect_t *conn); + h2span_conn_t *conn); static void -hammer2_relay_scan(h2span_connect_t *conn, h2span_node_t *node) +hammer2_relay_scan(h2span_conn_t *conn, h2span_node_t *node) { h2span_cluster_t *cls; @@ -884,13 +884,16 @@ hammer2_relay_scan_callback(h2span_relay_t *relay, void *arg) } static void -hammer2_relay_scan_specific(h2span_node_t *node, h2span_connect_t *conn) +hammer2_relay_scan_specific(h2span_node_t *node, h2span_conn_t *conn) { struct relay_scan_info info; h2span_relay_t *relay; h2span_relay_t *next_relay; h2span_link_t *slink; + hammer2_lnk_conn_t *lconn; + hammer2_msg_t *msg; int count = 2; + uint8_t peer_type; info.node = node; info.relay = NULL; @@ -912,99 +915,118 @@ hammer2_relay_scan_specific(h2span_node_t *node, h2span_connect_t *conn) /* * Iterate the node's links (received SPANs) in distance order, * lowest (best) dist first. + * + * PROPAGATE THE BEST LINKS OVER THE SPECIFIED CONNECTION. + * + * Track relays while iterating the best links and construct + * missing relays when necessary. + * + * (If some prior better link was removed it would have also + * removed the relay, so the relay can only match exactly or + * be worse). */ - /* fprintf(stderr, "LOOP\n"); */ RB_FOREACH(slink, h2span_link_tree, &node->tree) { /* - fprintf(stderr, "SLINK %p RELAY %p(%p)\n", - slink, relay, relay ? relay->link : NULL); - */ - /* - * PROPAGATE THE BEST LINKS OVER THE SPECIFIED CONNECTION. - * - * Track relays while iterating the best links and construct - * missing relays when necessary. - * - * (If some prior better link was removed it would have also - * removed the relay, so the relay can only match exactly or - * be worse). + * Match, relay already in-place, get the next + * relay to match against the next slink. */ if (relay && relay->link == slink) { - /* - * Match, relay already in-place, get the next - * relay to match against the next slink. - */ relay = RB_NEXT(h2span_relay_tree, &conn->tree, relay); if (--count == 0) break; - } else if (slink->dist > HAMMER2_SPAN_MAXDIST) { - /* - * No match but span distance is too great, - * do not relay. This prevents endless closed - * loops with ever-incrementing distances when - * the seed span is lost in the graph. - * - * All later spans will also be too far away so - * we can break out of the loop. - */ + continue; + } + + /* + * We might want this SLINK, if it passes our filters. + * + * The spanning tree can cause closed loops so we have + * to limit slink->dist. + */ + if (slink->dist > HAMMER2_SPAN_MAXDIST) break; - } else if (slink->state->iocom == conn->state->iocom) { - /* - * No match but we would transmit a LNK_SPAN - * out the same connection it came in on, which - * can be trivially optimized out. - */ + + /* + * Don't bother transmitting a LNK_SPAN out the same + * connection it came in on. Trivial optimization. + */ + if (slink->state->iocom == conn->state->iocom) break; - } else { - /* - * No match, distance is ok, construct a new relay. - * (slink is better than relay). - */ - hammer2_msg_t *msg; - - assert(relay == NULL || - relay->link->node != slink->node || - relay->link->dist >= slink->dist); - relay = hammer2_alloc(sizeof(*relay)); - relay->conn = conn; - relay->link = slink; - - msg = hammer2_msg_alloc(conn->state->iocom->router, 0, - HAMMER2_LNK_SPAN | - HAMMER2_MSGF_CREATE, - hammer2_lnk_relay, relay); - relay->state = msg->state; - relay->router = hammer2_router_alloc(); - relay->router->iocom = relay->state->iocom; - relay->router->relay = relay; - relay->router->target = relay->state->msgid; - - msg->any.lnk_span = slink->state->msg->any.lnk_span; - msg->any.lnk_span.dist = slink->dist + 1; - - hammer2_router_connect(relay->router); - - RB_INSERT(h2span_relay_tree, &conn->tree, relay); - TAILQ_INSERT_TAIL(&slink->relayq, relay, entry); - - hammer2_msg_write(msg); - - fprintf(stderr, - "RELAY SPAN %p RELAY %p ON CLS=%p NODE=%p DIST=%d " - "FD %d state %p\n", - slink, - relay, - node->cls, node, slink->dist, - conn->state->iocom->sock_fd, relay->state); - /* - * Match (created new relay), get the next relay to - * match against the next slink. - */ - relay = RB_NEXT(h2span_relay_tree, &conn->tree, relay); - if (--count == 0) + /* + * NOTE ON FILTERS: The protocol spec allows non-requested + * SPANs to be transmitted, the other end is expected to + * leave their transactions open but otherwise ignore them. + * + * Don't bother transmitting if the remote connection + * is not accepting this SPAN's peer_type. + */ + peer_type = slink->state->msg->any.lnk_span.peer_type; + lconn = &conn->state->msg->any.lnk_conn; + if (((1LLU << peer_type) & lconn->peer_mask) == 0) + break; + + /* + * Filter based on pfs_clid or label (XXX). This typically + * reduces the amount of SPAN traffic that a mount end-point + * sees by only passing along SPANs related to the cluster id + * (that is, it will see all PFS's associated with the + * particular cluster it represents). + */ + if (peer_type == lconn->peer_type && + peer_type == HAMMER2_PEER_HAMMER2) { + if (!uuid_is_nil(&slink->node->cls->pfs_clid, NULL) && + uuid_compare(&slink->node->cls->pfs_clid, + &lconn->pfs_clid, NULL) != 0) { break; + } } + + /* + * Ok, we've accepted this SPAN for relaying. + */ + assert(relay == NULL || + relay->link->node != slink->node || + relay->link->dist >= slink->dist); + relay = hammer2_alloc(sizeof(*relay)); + relay->conn = conn; + relay->link = slink; + + msg = hammer2_msg_alloc(conn->state->iocom->router, 0, + HAMMER2_LNK_SPAN | + HAMMER2_MSGF_CREATE, + hammer2_lnk_relay, relay); + relay->state = msg->state; + relay->router = hammer2_router_alloc(); + relay->router->iocom = relay->state->iocom; + relay->router->relay = relay; + relay->router->target = relay->state->msgid; + + msg->any.lnk_span = slink->state->msg->any.lnk_span; + msg->any.lnk_span.dist = slink->dist + 1; + + hammer2_router_connect(relay->router); + + RB_INSERT(h2span_relay_tree, &conn->tree, relay); + TAILQ_INSERT_TAIL(&slink->relayq, relay, entry); + + hammer2_msg_write(msg); + + fprintf(stderr, + "RELAY SPAN %p RELAY %p ON CLS=%p NODE=%p DIST=%d " + "FD %d state %p\n", + slink, + relay, + node->cls, node, slink->dist, + conn->state->iocom->sock_fd, relay->state); + + /* + * Match (created new relay), get the next relay to + * match against the next slink. + */ + relay = RB_NEXT(h2span_relay_tree, &conn->tree, relay); + if (--count == 0) + break; } /* diff --git a/sbin/hammer2/network.h b/sbin/hammer2/network.h index 565d38759a..9ca48c405c 100644 --- a/sbin/hammer2/network.h +++ b/sbin/hammer2/network.h @@ -129,7 +129,7 @@ RB_HEAD(hammer2_router_tree, hammer2_router); struct h2span_link; struct h2span_relay; -struct h2span_connect; +struct h2span_conn; struct hammer2_state { RB_ENTRY(hammer2_state) rbnode; /* indexed by msgid */ @@ -145,7 +145,7 @@ struct hammer2_state { union { void *any; struct h2span_link *link; - struct h2span_connect *conn; + struct h2span_conn *conn; struct h2span_relay *relay; } any; }; @@ -172,7 +172,7 @@ int hammer2_state_cmp(hammer2_state_t *state1, hammer2_state_t *state2); RB_PROTOTYPE(hammer2_state_tree, hammer2_state, rbnode, hammer2_state_cmp); /* - * hammer2_ioq - An embedded component of hammer2_connect, holds state + * hammer2_ioq - An embedded component of hammer2_conn, holds state * for the buffering and parsing of incoming and outgoing messages. * * cdx - beg - processed buffer data, encrypted or decrypted diff --git a/sbin/newfs_hammer2/newfs_hammer2.c b/sbin/newfs_hammer2/newfs_hammer2.c index 66c757fde9..322d0b3c79 100644 --- a/sbin/newfs_hammer2/newfs_hammer2.c +++ b/sbin/newfs_hammer2/newfs_hammer2.c @@ -622,6 +622,8 @@ format_hammer2(int fd, hammer2_off_t total_space, hammer2_off_t free_space) vol->fsid = Hammer2_FSId; vol->fstype = Hammer2_FSType; + vol->peer_type = HAMMER2_PEER_HAMMER2; /* LNK_CONN identification */ + vol->allocator_size = free_space; vol->allocator_free = free_space; vol->allocator_beg = alloc_base; diff --git a/sys/vfs/hammer2/hammer2_disk.h b/sys/vfs/hammer2/hammer2_disk.h index 37f7c80d8d..af13fd36b9 100644 --- a/sys/vfs/hammer2/hammer2_disk.h +++ b/sys/vfs/hammer2/hammer2_disk.h @@ -507,6 +507,18 @@ typedef struct hammer2_inode_data hammer2_inode_data_t; #define HAMMER2_CHECK_NONE 0 #define HAMMER2_CHECK_ICRC 1 +/* + * PEER types identify connections and help cluster controller filter + * out unwanted SPANs. + */ +#define HAMMER2_PEER_NONE 0 +#define HAMMER2_PEER_CLUSTER 1 /* a cluster controller */ +#define HAMMER2_PEER_BLOCK 2 /* block devices */ +#define HAMMER2_PEER_HAMMER2 3 /* hammer2-mounted volumes */ + +/* + * PFS types identify a PFS on media and in LNK_SPAN messages. + */ #define HAMMER2_PFSTYPE_NONE 0 #define HAMMER2_PFSTYPE_ADMIN 1 #define HAMMER2_PFSTYPE_CLIENT 2 @@ -703,7 +715,7 @@ struct hammer2_volume_data { uint32_t flags; /* 0034 */ uint8_t copyid; /* 0038 copyid of phys vol */ uint8_t freemap_version; /* 0039 freemap algorithm */ - uint8_t pfs_type; /* 003A local media pfstype */ + uint8_t peer_type; /* 003A HAMMER2_PEER_xxx */ uint8_t reserved003B; /* 003B */ uint32_t reserved003C; /* 003C */ diff --git a/sys/vfs/hammer2/hammer2_network.h b/sys/vfs/hammer2/hammer2_network.h index 533e1152a3..8eee05d0c4 100644 --- a/sys/vfs/hammer2/hammer2_network.h +++ b/sys/vfs/hammer2/hammer2_network.h @@ -373,18 +373,39 @@ struct hammer2_lnk_auth { char dummy[64]; }; +/* + * LNK_CONN identifies a streaming connection into the cluster. The other + * fields serve as a filter when supported for a particular peer and are + * not necessarily all used. + * + * peer_mask serves to filter the SPANs we receive by peer. A cluster + * controller typically sets this to (uint64_t)-1, a block devfs + * interface might set it to 1 << HAMMER2_PEER_DISK, and a hammer2 + * mount might set it to 1 << HAMMER2_PEER_HAMMER2. + * + * mediaid allows multiple (e.g. HAMMER2) connections belonging to the same + * media, in terms of LNK_VOLCONF updates. + * + * pfs_clid, pfs_fsid, pfs_type, and label are peer-specific and must be + * left empty (zero-fill) if not supported by a particular peer. + * + * HAMMER2_PEER_CLUSTER filter: none + * HAMMER2_PEER_DISK filter: label + * HAMMER2_PEER_HAMMER2 filter: pfs_clid if not empty, and label + */ struct hammer2_lnk_conn { hammer2_msg_hdr_t head; uuid_t mediaid; /* media configuration id */ uuid_t pfs_clid; /* rendezvous pfs uuid */ uuid_t pfs_fsid; /* unique pfs uuid */ - uint8_t pfs_type; /* peer type */ - uint8_t reserved01; + uint64_t peer_mask; /* PEER mask for SPAN filtering */ + uint8_t peer_type; /* see HAMMER2_PEER_xxx */ + uint8_t pfs_type; /* pfs type */ uint16_t proto_version; /* high level protocol support */ uint32_t status; /* status flags */ uint8_t reserved02[8]; int32_t dist; /* span distance */ - uint32_t reserved03[15]; + uint32_t reserved03[14]; char label[256]; /* PFS label (can be wildcard) */ }; @@ -436,8 +457,8 @@ struct hammer2_lnk_span { hammer2_msg_hdr_t head; uuid_t pfs_clid; /* rendezvous pfs uuid */ uuid_t pfs_fsid; /* unique pfs uuid */ - uint8_t pfs_type; /* peer type */ - uint8_t reserved01; + uint8_t pfs_type; /* PFS type */ + uint8_t peer_type; /* PEER type */ uint16_t proto_version; /* high level protocol support */ uint32_t status; /* status flags */ uint8_t reserved02[8]; diff --git a/sys/vfs/hammer2/hammer2_vfsops.c b/sys/vfs/hammer2/hammer2_vfsops.c index 21c8652df0..7046890e68 100644 --- a/sys/vfs/hammer2/hammer2_vfsops.c +++ b/sys/vfs/hammer2/hammer2_vfsops.c @@ -1241,6 +1241,8 @@ hammer2_cluster_thread_wr(void *arg) msg->any.lnk_conn.pfs_fsid = pmp->iroot->ip_data.pfs_fsid; msg->any.lnk_conn.pfs_type = pmp->iroot->ip_data.pfs_type; msg->any.lnk_conn.proto_version = HAMMER2_SPAN_PROTO_1; + msg->any.lnk_conn.peer_type = pmp->hmp->voldata.peer_type; + msg->any.lnk_conn.peer_mask = 1LLU << HAMMER2_PEER_HAMMER2; name_len = pmp->iroot->ip_data.name_len; if (name_len >= sizeof(msg->any.lnk_conn.label)) name_len = sizeof(msg->any.lnk_conn.label) - 1; @@ -1537,6 +1539,7 @@ hammer2_msg_conn_reply(hammer2_state_t *state, hammer2_msg_t *msg) rmsg->any.lnk_span.pfs_clid = pmp->iroot->ip_data.pfs_clid; rmsg->any.lnk_span.pfs_fsid = pmp->iroot->ip_data.pfs_fsid; rmsg->any.lnk_span.pfs_type = pmp->iroot->ip_data.pfs_type; + rmsg->any.lnk_span.peer_type = pmp->hmp->voldata.peer_type; rmsg->any.lnk_span.proto_version = HAMMER2_SPAN_PROTO_1; name_len = pmp->iroot->ip_data.name_len; if (name_len >= sizeof(rmsg->any.lnk_span.label)) -- 2.41.0