From: Matthew Dillon Date: Thu, 9 Aug 2012 01:32:16 +0000 (-0700) Subject: hammer2 - SPAN protocol work X-Git-Tag: v3.4.0rc~1036 X-Git-Url: http://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/cf7158008b31a0a05335c5c3d5438de35f5740fc hammer2 - SPAN protocol work * Because we allow loops in the graph the loss of a feeder node can result in a tail-chasing loop of SPAN updates with an ever-growing distance parameter. To deal with this a spanning tree distance limit is required, beyond which no propagation occurs which terminates the chase. The tail then catches up to the head and the node is finally removed from the spanning tree entirely. This fixes the propagation of spanning tree deletions e.g. when we umount a HAMMER2 PFS. * Fix a state insertion bug. A structure was being inserted into the red-black tree before the required fields were being initialized. Corrects a SPAN propagation fault. --- diff --git a/sbin/hammer2/msg.c b/sbin/hammer2/msg.c index c4480ba..1e33c80 100644 --- a/sbin/hammer2/msg.c +++ b/sbin/hammer2/msg.c @@ -39,6 +39,26 @@ static int hammer2_state_msgrx(hammer2_iocom_t *iocom, hammer2_msg_t *msg); static void hammer2_state_cleanuptx(hammer2_iocom_t *iocom, hammer2_msg_t *msg); /* + * Indexed messages are stored in a red-black tree indexed by their + * msgid. Only persistent messages are indexed. + */ +int +hammer2_state_cmp(hammer2_state_t *state1, hammer2_state_t *state2) +{ + if (state1->spanid < state2->spanid) + return(-1); + if (state1->spanid > state2->spanid) + return(1); + if (state1->msgid < state2->msgid) + return(-1); + if (state1->msgid > state2->msgid) + return(1); + return(0); +} + +RB_GENERATE(hammer2_state_tree, hammer2_state, rbnode, hammer2_state_cmp); + +/* * Initialize a low-level ioq */ void @@ -436,6 +456,9 @@ again: return (msg); } + if (ioq->error) + goto skip; + /* * Message read in-progress (msg is NULL at the moment). We don't * allocate a msg until we have its core header. @@ -768,6 +791,7 @@ again: * to update them when breaking out. */ if (ioq->error) { +skip: /* * An unrecoverable error causes all active receive * transactions to be terminated with a LNK_ERROR message. @@ -801,6 +825,7 @@ again: msg->any.head.error = ioq->error; pthread_mutex_lock(&iocom->mtx); + hammer2_iocom_drain(iocom); if ((state = RB_ROOT(&iocom->staterd_tree)) != NULL) { /* * Active remote transactions are still present. @@ -1201,7 +1226,9 @@ hammer2_msg_write(hammer2_iocom_t *iocom, hammer2_msg_t *msg, state->rxcmd = HAMMER2_MSGF_REPLY; state->func = func; state->any.any = data; + pthread_mutex_lock(&iocom->mtx); RB_INSERT(hammer2_state_tree, &iocom->statewr_tree, state); + pthread_mutex_unlock(&iocom->mtx); state->flags |= HAMMER2_STATE_INSERTED; msg->state = state; msg->any.head.msgid = state->msgid; @@ -1366,8 +1393,6 @@ hammer2_state_reply(hammer2_state_t *state, uint32_t error) * */ -RB_GENERATE(hammer2_state_tree, hammer2_state, rbnode, hammer2_state_cmp); - /* * Process state tracking for a message after reception, prior to * execution. @@ -1485,6 +1510,7 @@ hammer2_state_msgrx(hammer2_iocom_t *iocom, hammer2_msg_t *msg) fprintf(stderr, "duplicate-trans %s\n", hammer2_msg_str(msg)); error = HAMMER2_IOQ_ERROR_TRANS; + assert(0); break; } state = malloc(sizeof(*state)); @@ -1494,14 +1520,18 @@ hammer2_state_msgrx(hammer2_iocom_t *iocom, hammer2_msg_t *msg) state->msg = msg; state->txcmd = HAMMER2_MSGF_REPLY; state->rxcmd = msg->any.head.cmd & ~HAMMER2_MSGF_DELETE; - pthread_mutex_lock(&iocom->mtx); - RB_INSERT(hammer2_state_tree, &iocom->staterd_tree, state); - pthread_mutex_unlock(&iocom->mtx); state->flags |= HAMMER2_STATE_INSERTED; state->msgid = msg->any.head.msgid; state->spanid = msg->any.head.spanid; msg->state = state; + pthread_mutex_lock(&iocom->mtx); + RB_INSERT(hammer2_state_tree, &iocom->staterd_tree, state); + pthread_mutex_unlock(&iocom->mtx); error = 0; + if (DebugOpt) { + fprintf(stderr, "create state %p id=%08x on iocom staterd %p\n", + state, (uint32_t)state->msgid, iocom); + } break; case HAMMER2_MSGF_DELETE: /* @@ -1515,6 +1545,7 @@ hammer2_state_msgrx(hammer2_iocom_t *iocom, hammer2_msg_t *msg) fprintf(stderr, "missing-state %s\n", hammer2_msg_str(msg)); error = HAMMER2_IOQ_ERROR_TRANS; + assert(0); } break; } @@ -1530,6 +1561,7 @@ hammer2_state_msgrx(hammer2_iocom_t *iocom, hammer2_msg_t *msg) fprintf(stderr, "reused-state %s\n", hammer2_msg_str(msg)); error = HAMMER2_IOQ_ERROR_TRANS; + assert(0); } break; } @@ -1559,6 +1591,7 @@ hammer2_state_msgrx(hammer2_iocom_t *iocom, hammer2_msg_t *msg) fprintf(stderr, "no-state(r) %s\n", hammer2_msg_str(msg)); error = HAMMER2_IOQ_ERROR_TRANS; + assert(0); break; } assert(((state->rxcmd ^ msg->any.head.cmd) & @@ -1578,6 +1611,7 @@ hammer2_state_msgrx(hammer2_iocom_t *iocom, hammer2_msg_t *msg) fprintf(stderr, "no-state(r,d) %s\n", hammer2_msg_str(msg)); error = HAMMER2_IOQ_ERROR_TRANS; + assert(0); } break; } @@ -1594,6 +1628,7 @@ hammer2_state_msgrx(hammer2_iocom_t *iocom, hammer2_msg_t *msg) fprintf(stderr, "reused-state(r,d) %s\n", hammer2_msg_str(msg)); error = HAMMER2_IOQ_ERROR_TRANS; + assert(0); } break; } @@ -1710,8 +1745,8 @@ hammer2_state_free(hammer2_state_t *state) char dummy; if (DebugOpt) { - fprintf(stderr, "terminate state id=%08x\n", - (uint32_t)state->msgid); + fprintf(stderr, "terminate state %p id=%08x\n", + state, (uint32_t)state->msgid); } assert(state->any.any == NULL); msg = state->msg; @@ -1737,24 +1772,6 @@ hammer2_state_free(hammer2_state_t *state) } } -/* - * Indexed messages are stored in a red-black tree indexed by their - * msgid. Only persistent messages are indexed. - */ -int -hammer2_state_cmp(hammer2_state_t *state1, hammer2_state_t *state2) -{ - if (state1->spanid < state2->spanid) - return(-1); - if (state1->spanid > state2->spanid) - return(1); - if (state1->msgid < state2->msgid) - return(-1); - if (state1->msgid > state2->msgid) - return(1); - return(0); -} - const char * hammer2_basecmd_str(uint32_t cmd) { diff --git a/sbin/hammer2/msg_lnk.c b/sbin/hammer2/msg_lnk.c index 7e868d7..98b3266 100644 --- a/sbin/hammer2/msg_lnk.c +++ b/sbin/hammer2/msg_lnk.c @@ -119,6 +119,12 @@ #include "hammer2.h" /* + * Maximum spanning tree distance. This has the practical effect of + * stopping tail-chasing closed loops when a feeder span is lost. + */ +#define HAMMER2_SPAN_MAXDIST 16 + +/* * RED-BLACK TREE DEFINITIONS * * We need to track: @@ -258,6 +264,10 @@ h2span_node_cmp(h2span_node_t *node1, h2span_node_t *node2) return(uuid_compare(&node1->pfs_fsid, &node2->pfs_fsid, NULL)); } +/* + * NOTE: Sort/subsort must match h2span_relay_cmp() under any given + * node. + */ static int h2span_link_cmp(h2span_link_t *link1, h2span_link_t *link2) @@ -672,6 +682,9 @@ hammer2_relay_scan_specific(h2span_node_t *node, h2span_connect_t *conn) RB_SCAN(h2span_relay_tree, &conn->tree, hammer2_relay_scan_cmp, hammer2_relay_scan_callback, &info); relay = info.relay; + info.relay = NULL; + if (relay) + assert(relay->link->node == node); if (DebugOpt > 8) fprintf(stderr, "relay scan for connection %p\n", conn); @@ -682,19 +695,35 @@ hammer2_relay_scan_specific(h2span_node_t *node, h2span_connect_t *conn) */ RB_FOREACH(slink, h2span_link_tree, &node->tree) { /* - * PROPAGATE THE BEST RELAYS BY TRANSMITTING SPANs. - * - * Check for match against current best relay. + * PROPAGATE THE BEST LINKS OVER THE SPECIFIED CONNECTION. * - * A match failure means that the current best relay is not - * as good as the link, create a new relay for the link. + * Track relays while iterating the best links and construct + * missing relays when necessary. * * (If some prior better link was removed it would have also * removed the relay, so the relay can only match exactly or * be worst). */ - info.relay = relay; - if (relay == NULL || relay->link != slink) { + if (relay && relay->link == slink) { + /* + * Match, get the next relay to match against the + * next slink. + */ + relay = RB_NEXT(h2span_relay_tree, &conn->tree, relay); + if (--count == 0) + break; + } else if (slink->dist > HAMMER2_SPAN_MAXDIST) { + /* + * No match but span distance is too great, + * do not relay. This prevents endless closed + * loops with ever-incrementing distances when + * the seed span is lost in the graph. + */ + /* no code needed */ + } else { + /* + * No match, distance is ok, construct a new relay. + */ hammer2_msg_t *msg; assert(relay == NULL || @@ -703,6 +732,9 @@ hammer2_relay_scan_specific(h2span_node_t *node, h2span_connect_t *conn) relay->conn = conn; relay->link = slink; + RB_INSERT(h2span_relay_tree, &conn->tree, relay); + TAILQ_INSERT_TAIL(&slink->relayq, relay, entry); + msg = hammer2_msg_alloc(conn->state->iocom, 0, HAMMER2_LNK_SPAN | HAMMER2_MSGF_CREATE); @@ -713,21 +745,18 @@ hammer2_relay_scan_specific(h2span_node_t *node, h2span_connect_t *conn) hammer2_lnk_relay, relay, &relay->state); fprintf(stderr, - "RELAY SPAN ON CLS=%p NODE=%p FD %d state %p\n", - node->cls, node, + "RELAY SPAN ON CLS=%p NODE=%p DIST=%d " + "FD %d state %p\n", + node->cls, node, slink->dist, conn->state->iocom->sock_fd, relay->state); - RB_INSERT(h2span_relay_tree, &conn->tree, relay); - TAILQ_INSERT_TAIL(&slink->relayq, relay, entry); - } - - /* - * Iterate, figure out the next relay. - */ - relay = RB_NEXT(h2span_relay_tree, &conn->tree, relay); - if (--count == 0) { - break; - continue; + /* + * Match (created new relay), get the next relay to + * match against the next slink. + */ + relay = RB_NEXT(h2span_relay_tree, &conn->tree, relay); + if (--count == 0) + break; } } @@ -748,10 +777,10 @@ void hammer2_relay_delete(h2span_relay_t *relay) { fprintf(stderr, - "RELAY DELETE ON CLS=%p NODE=%p FD %d STATE %p\n", + "RELAY DELETE ON CLS=%p NODE=%p DIST=%d FD %d STATE %p\n", relay->link->node->cls, relay->link->node, + relay->link->dist, relay->conn->state->iocom->sock_fd, relay->state); - fprintf(stderr, "RELAY TX %08x RX %08x\n", relay->state->txcmd, relay->state->rxcmd); RB_REMOVE(h2span_relay_tree, &relay->conn->tree, relay); TAILQ_REMOVE(&relay->link->relayq, relay, entry);