hammer2 - Early messaging infrastructure
[dragonfly.git] / sbin / hammer2 / network.h
CommitLineData
9ab15106
MD
1/*
2 * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36/***************************************************************************
37 * LOW LEVEL MESSAGING *
38 ***************************************************************************
39 *
40 * hammer2_msg - A standalone copy of a message, typically referenced by
41 * or embedded in other structures, or used with I/O queues.
42 *
43 * These structures are strictly temporary, so they do not have to be
44 * particularly optimized for size. All possible message headers are
45 * directly embedded (any), and the message may contain a reference
46 * to allocated auxillary data. The structure is recycled quite often
47 * by a connection.
48 *
49 * This structure is typically not used for storing persistent message
50 * state (see hammer2_pmsg for that).
51 */
52struct hammer2_msg {
53 TAILQ_ENTRY(hammer2_msg) entry; /* queue */
54 char *aux_data; /* aux-data if any */
55 int aux_size;
56 int flags;
57 hammer2_any_t any; /* raw extended msg header */
58};
59
60typedef struct hammer2_msg hammer2_msg_t;
61
62TAILQ_HEAD(hammer2_msg_queue, hammer2_msg);
63typedef struct hammer2_msg_queue hammer2_msg_queue_t;
64
65#define HAMMER2_MSGX_BSWAPPED 0x0001
66
67/*
68 * hammer2_ioq - An embedded component of hammer2_connect, holds state
69 * for the buffering and parsing of incoming and outgoing messages.
70 */
71struct hammer2_ioq {
72 enum { HAMMER2_MSGQ_STATE_HEADER1,
73 HAMMER2_MSGQ_STATE_HEADER2,
74 HAMMER2_MSGQ_STATE_AUXDATA1,
75 HAMMER2_MSGQ_STATE_AUXDATA2,
76 HAMMER2_MSGQ_STATE_ERROR } state;
77 int fifo_beg; /* buffered data */
78 int fifo_end;
79 int hbytes; /* header size */
80 int abytes; /* aux_data size */
81 int error;
82 int seq; /* salt sequencer */
83 int msgcount;
84 hammer2_msg_t *msg;
85 hammer2_msg_queue_t msgq;
86};
87
88typedef struct hammer2_ioq hammer2_ioq_t;
89
90#define HAMMER2_IOQ_ERROR_SYNC 1 /* bad magic / out of sync */
91#define HAMMER2_IOQ_ERROR_EOF 2 /* unexpected EOF */
92#define HAMMER2_IOQ_ERROR_SOCK 3 /* read() error on socket */
93#define HAMMER2_IOQ_ERROR_FIELD 4 /* invalid field */
94#define HAMMER2_IOQ_ERROR_HCRC 5 /* core header crc bad */
95#define HAMMER2_IOQ_ERROR_XCRC 6 /* ext header crc bad */
96#define HAMMER2_IOQ_ERROR_ACRC 7 /* aux data crc bad */
97#define HAMMER2_IOQ_ERROR_STATE 8 /* bad state */
98
99#define HAMMER2_IOQ_MAXIOVEC 16
100
101/*
102 * hammer2_iocom - governs a messaging stream connection
103 */
104struct hammer2_iocom {
105 hammer2_ioq_t ioq_rx;
106 hammer2_ioq_t ioq_tx;
107 hammer2_msg_queue_t freeq; /* free msgs hdr only */
108 hammer2_msg_queue_t freeq_aux; /* free msgs w/aux_data */
109 void (*recvmsg_callback)(struct hammer2_iocom *);
110 void (*sendmsg_callback)(struct hammer2_iocom *);
111 void (*altmsg_callback)(struct hammer2_iocom *);
112 int sock_fd; /* comm socket or pipe */
113 int alt_fd; /* thread signal, tty, etc */
114 int flags;
115 char rxbuf[HAMMER2_MSGBUF_SIZE]; /* for ioq_rx only */
116};
117
118typedef struct hammer2_iocom hammer2_iocom_t;
119
120#define HAMMER2_IOCOMF_EOF 0x00000001 /* EOF or ERROR on desc */
121#define HAMMER2_IOCOMF_RREQ 0x00000002 /* request read-data event */
122#define HAMMER2_IOCOMF_WREQ 0x00000004 /* request write-avail event */
123#define HAMMER2_IOCOMF_WIDLE 0x00000008 /* request write-avail event */
124#define HAMMER2_IOCOMF_SIGNAL 0x00000010
125
126/***************************************************************************
127 * HIGH LEVEL MESSAGING *
128 ***************************************************************************
129 *
130 */
131
132#if 0
133
134
135
136/*
137 * The global registration structure consolidates information accumulated
138 * via the spanning tree algorithm and tells us which connection (link)
139 * is the best path to get to any given registration.
140 *
141 * glob_node - Splay entry for this registration in the global index
142 * of all registrations.
143 *
144 * glob_entry - tailq entry when this registration's best_span element
145 * has changed state.
146 *
147 * span_list - Head of a simple list of spanning tree entries which
148 * we use to determine the best link.
149 *
150 * best_span - Which of the span structure on span_list is the best
151 * one.
152 *
153 * source_root - Splay tree root indexing all mesasges sent from this
154 * registration. The messages are indexed by
155 * {linkid,msgid} XXX
156 *
157 * target_root - Splay tree root indexing all messages being sent to
158 * this registration. The messages are indexed by
159 * {linkid,msgid}. XXX
160 *
161 *
162 * Whenever spanning tree data causes a registration's best_link field to
163 * change that registration is transmitted as spanning tree data to every
164 * active link. Note that pure clients to the cluster, of which there can
165 * be millions, typically do not transmit spanning tree data to each other.
166 *
167 * Each registration is assigned a unique linkid local to the node (another
168 * node might assign a different linkid to the same registration). This
169 * linkid must be persistent as long as messages are active and is used
170 * to identify the message source and target.
171 */
172TAILQ_HEAD(hammer2_span_list, hammer2_span);
173typedef struct hammer2_span_list hammer2_span_list_t;
174
175struct hammer2_reg {
176 SPLAY_ENTRY(hammer2_reg) glob_node; /* index of registrations */
177 TAILQ_ENTRY(hammer2_reg) glob_entry; /* when modified */
178 hammer2_span_list_t span_list; /* list of hammer2_span's */
179 hammer2_span_t *best_span; /* best span entry */
180 hammer2_pmsg_splay_head_t source_root; /* msgs sent from reg */
181 hammer2_pmsg_splay_head_t target_root; /* msgs sent to reg */
182 uuid_t pfs_id; /* key field */
183 uuid_t pfs_fsid; /* key field */
184 uint32_t linkid;
185 int flags;
186 int refs;
187};
188
189#define HAMMER2_PROTO_REGF_MODIFIED 0x0001
190
191/*
192 * Each link (connection) collects spanning tree data received via the
193 * link and stores it in these span structures.
194 */
195struct hammer2_span {
196 TAILQ_ENTRY(hammer2_span) span_entry; /* from hammer2_reg */
197 SPLAY_ENTRY(hammer2_span) span_node; /* from hammer2_link */
198 hammer2_reg_t *reg;
199 hammer2_link_t *link;
200 int weight;
201};
202
203/*
204 * Most hammer2 messages represent transactions and have persistent state
205 * which must be recorded. Some messages, such as cache states and inode
206 * representations are very long-lasting transactions.
207 *
208 * Each node in the graph must keep track of the message state in order
209 * to perform the proper action when a connection is lost. To do this
210 * the message is indexed on the source and target (global) registration,
211 * and the actual span element the message was received on and transmitted
212 * to is recorded (allowing us to retrieve the physical links involved).
213 *
214 * The {source_reg, target_reg, msgid} uniquely identifies a message. Any
215 * streaming operations using the same msgid use the same rendezvous.
216 *
217 * It is important to note that recorded state must use the same physical
218 * link (and thus the same chain of links across the graph) as was 'forged'
219 * by the initial message for that msgid. If the source span a message is
220 * received on does not match the recorded source, or the recorded target
221 * is no longer routeable, the message will be returned or generate an ABORT
222 * with LINKFAIL as appropriate.
223 */
224struct hammer2_pmsg {
225 SPLAY_ENTRY(hammer2_pmsg) source_reg;
226 SPLAY_ENTRY(hammer2_pmsg) target_reg;
227 hammer2_span_t *source;
228 hammer2_span_t *target;
229 uint16_t msgid;
230 void *aux_data; /* allocated aux data */
231 hammer2_msg_any_t any; /* dynamically allocated */
232};
233
234#endif