hammer2 - minor network adjustments
[dragonfly.git] / sbin / hammer2 / network.h
CommitLineData
9ab15106
MD
1/*
2 * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
5cf97ec5
MD
36#include <openssl/rsa.h> /* public/private key functions */
37#include <openssl/pem.h> /* public/private key file load */
38#include <openssl/err.h>
39#include <openssl/evp.h> /* aes_256_cbc functions */
40
62efe6ec
MD
41/***************************************************************************
42 * CRYPTO HANDSHAKE *
43 ***************************************************************************
44 *
45 * The initial public-key exchange is implementing by transmitting a
46 * 512-byte buffer to the other side in a symmetrical fashion. This
47 * buffer contains the following:
48 *
5cf97ec5
MD
49 * (1) A random session key. 512 bits is specified. We use aes_256_cbc()
50 * and initialize the key with the first 256 bits and the iv[] with
51 * the second. Note that the transmitted and received session
52 * keys are XOR'd together to create the session key used for
53 * communications (so even if the verifier is compromised the session
54 * will still be gobbly gook if the public key has not been completely
55 * broken).
62efe6ec
MD
56 *
57 * (2) A verifier to determine that the decode was successful. It encodes
58 * an XOR of each group of 4 bytes from the session key.
59 *
60 * (3) Additional configuration and additional random data.
61 *
62 * - The hammer2 message header magic for endian detect
63 *
64 * - The hammer2 protocol version. The two sides agree on the
65 * smaller of the two.
66 *
67 * - All unused fields (junk*) are filled with random data.
68 *
69 * This structure must be exactly 512 bytes and expects to use 256-byte
70 * RSA keys.
71 */
72struct hammer2_handshake {
73 char pad1[8]; /* 000 */
74 uint16_t magic; /* 008 HAMMER2_MSGHDR_MAGIC for endian detect */
75 uint16_t version; /* 00A hammer2 protocol version */
76 uint32_t flags; /* 00C protocol extension flags */
77 uint8_t sess[64]; /* 010 512-bit session key */
78 uint8_t verf[16]; /* 050 verifier = ~sess */
79 char quickmsg[32]; /* 060 reason for connecting */
80 char junk080[128]; /* 080-0FF */
81 char pad2[8]; /* 100-107 */
82 char junk100[256-8]; /* 108-1FF */
83};
84
85typedef struct hammer2_handshake hammer2_handshake_t;
86
5cf97ec5
MD
87#define HAMMER2_AES_KEY_SIZE 32
88#define HAMMER2_AES_KEY_MASK (HAMMER2_AES_KEY_SIZE - 1)
89#define HAMMER2_AES_TYPE aes_256_cbc
90#define HAMMER2_AES_TYPE_EVP EVP_aes_256_cbc()
91#define HAMMER2_AES_TYPE_STR #HAMMER2_AES_TYPE
92
9ab15106
MD
93/***************************************************************************
94 * LOW LEVEL MESSAGING *
95 ***************************************************************************
96 *
97 * hammer2_msg - A standalone copy of a message, typically referenced by
98 * or embedded in other structures, or used with I/O queues.
99 *
100 * These structures are strictly temporary, so they do not have to be
101 * particularly optimized for size. All possible message headers are
102 * directly embedded (any), and the message may contain a reference
103 * to allocated auxillary data. The structure is recycled quite often
104 * by a connection.
105 *
106 * This structure is typically not used for storing persistent message
4a2e0eae 107 * state (see hammer2_persist for that).
9ab15106 108 */
4a2e0eae
MD
109struct hammer2_iocom;
110struct hammer2_persist;
111
9ab15106 112struct hammer2_msg {
4a2e0eae
MD
113 struct hammer2_iocom *iocom;
114 struct hammer2_persist *persist;
9ab15106
MD
115 TAILQ_ENTRY(hammer2_msg) entry; /* queue */
116 char *aux_data; /* aux-data if any */
117 int aux_size;
118 int flags;
119 hammer2_any_t any; /* raw extended msg header */
120};
121
122typedef struct hammer2_msg hammer2_msg_t;
123
124TAILQ_HEAD(hammer2_msg_queue, hammer2_msg);
125typedef struct hammer2_msg_queue hammer2_msg_queue_t;
126
127#define HAMMER2_MSGX_BSWAPPED 0x0001
128
129/*
130 * hammer2_ioq - An embedded component of hammer2_connect, holds state
131 * for the buffering and parsing of incoming and outgoing messages.
132 */
133struct hammer2_ioq {
134 enum { HAMMER2_MSGQ_STATE_HEADER1,
135 HAMMER2_MSGQ_STATE_HEADER2,
136 HAMMER2_MSGQ_STATE_AUXDATA1,
137 HAMMER2_MSGQ_STATE_AUXDATA2,
138 HAMMER2_MSGQ_STATE_ERROR } state;
139 int fifo_beg; /* buffered data */
5cf97ec5 140 int fifo_cdx; /* encrypt/decrypt index */
9ab15106
MD
141 int fifo_end;
142 int hbytes; /* header size */
143 int abytes; /* aux_data size */
5cf97ec5 144 int already; /* aux_data already decrypted */
9ab15106
MD
145 int error;
146 int seq; /* salt sequencer */
147 int msgcount;
5cf97ec5
MD
148 EVP_CIPHER_CTX ctx;
149 char iv[HAMMER2_AES_KEY_SIZE]; /* encrypt or decrypt iv[] */
9ab15106
MD
150 hammer2_msg_t *msg;
151 hammer2_msg_queue_t msgq;
5cf97ec5 152 char buf[HAMMER2_MSGBUF_SIZE]; /* staging buffer */
9ab15106
MD
153};
154
155typedef struct hammer2_ioq hammer2_ioq_t;
156
62efe6ec
MD
157#define HAMMER2_IOQ_ERROR_SYNC 1 /* bad magic / out of sync */
158#define HAMMER2_IOQ_ERROR_EOF 2 /* unexpected EOF */
159#define HAMMER2_IOQ_ERROR_SOCK 3 /* read() error on socket */
160#define HAMMER2_IOQ_ERROR_FIELD 4 /* invalid field */
161#define HAMMER2_IOQ_ERROR_HCRC 5 /* core header crc bad */
162#define HAMMER2_IOQ_ERROR_XCRC 6 /* ext header crc bad */
163#define HAMMER2_IOQ_ERROR_ACRC 7 /* aux data crc bad */
164#define HAMMER2_IOQ_ERROR_STATE 8 /* bad state */
165#define HAMMER2_IOQ_ERROR_NOPEER 9 /* bad socket peer */
166#define HAMMER2_IOQ_ERROR_NORKEY 10 /* no remote keyfile found */
167#define HAMMER2_IOQ_ERROR_NOLKEY 11 /* no local keyfile found */
168#define HAMMER2_IOQ_ERROR_KEYXCHGFAIL 12 /* key exchange failed */
169#define HAMMER2_IOQ_ERROR_KEYFMT 13 /* key file format problem */
170#define HAMMER2_IOQ_ERROR_BADURANDOM 14 /* /dev/urandom is bad */
5cf97ec5 171#define HAMMER2_IOQ_ERROR_MSGSEQ 15 /* message sequence error */
9ab15106
MD
172
173#define HAMMER2_IOQ_MAXIOVEC 16
174
175/*
176 * hammer2_iocom - governs a messaging stream connection
177 */
178struct hammer2_iocom {
179 hammer2_ioq_t ioq_rx;
180 hammer2_ioq_t ioq_tx;
181 hammer2_msg_queue_t freeq; /* free msgs hdr only */
182 hammer2_msg_queue_t freeq_aux; /* free msgs w/aux_data */
183 void (*recvmsg_callback)(struct hammer2_iocom *);
184 void (*sendmsg_callback)(struct hammer2_iocom *);
185 void (*altmsg_callback)(struct hammer2_iocom *);
186 int sock_fd; /* comm socket or pipe */
187 int alt_fd; /* thread signal, tty, etc */
188 int flags;
62efe6ec
MD
189 int rxmisc;
190 int txmisc;
5cf97ec5 191 char sess[HAMMER2_AES_KEY_SIZE]; /* aes_256_cbc key */
9ab15106
MD
192};
193
194typedef struct hammer2_iocom hammer2_iocom_t;
195
196#define HAMMER2_IOCOMF_EOF 0x00000001 /* EOF or ERROR on desc */
197#define HAMMER2_IOCOMF_RREQ 0x00000002 /* request read-data event */
198#define HAMMER2_IOCOMF_WREQ 0x00000004 /* request write-avail event */
199#define HAMMER2_IOCOMF_WIDLE 0x00000008 /* request write-avail event */
200#define HAMMER2_IOCOMF_SIGNAL 0x00000010
5cf97ec5 201#define HAMMER2_IOCOMF_CRYPTED 0x00000020 /* encrypt enabled */
9ab15106
MD
202
203/***************************************************************************
204 * HIGH LEVEL MESSAGING *
205 ***************************************************************************
206 *
4a2e0eae 207 * Persistent state is stored via the hammer2_persist structure.
9ab15106 208 */
4a2e0eae
MD
209struct hammer2_persist {
210 uint32_t lcmd; /* recent command direction */
211 uint32_t lrep; /* recent reply direction */
212};
213
214typedef struct hammer2_persist hammer2_persist_t;
9ab15106
MD
215
216#if 0
217
218
219
220/*
221 * The global registration structure consolidates information accumulated
222 * via the spanning tree algorithm and tells us which connection (link)
223 * is the best path to get to any given registration.
224 *
225 * glob_node - Splay entry for this registration in the global index
226 * of all registrations.
227 *
228 * glob_entry - tailq entry when this registration's best_span element
229 * has changed state.
230 *
231 * span_list - Head of a simple list of spanning tree entries which
232 * we use to determine the best link.
233 *
234 * best_span - Which of the span structure on span_list is the best
235 * one.
236 *
237 * source_root - Splay tree root indexing all mesasges sent from this
238 * registration. The messages are indexed by
239 * {linkid,msgid} XXX
240 *
241 * target_root - Splay tree root indexing all messages being sent to
242 * this registration. The messages are indexed by
243 * {linkid,msgid}. XXX
244 *
245 *
246 * Whenever spanning tree data causes a registration's best_link field to
247 * change that registration is transmitted as spanning tree data to every
248 * active link. Note that pure clients to the cluster, of which there can
249 * be millions, typically do not transmit spanning tree data to each other.
250 *
251 * Each registration is assigned a unique linkid local to the node (another
252 * node might assign a different linkid to the same registration). This
253 * linkid must be persistent as long as messages are active and is used
254 * to identify the message source and target.
255 */
256TAILQ_HEAD(hammer2_span_list, hammer2_span);
257typedef struct hammer2_span_list hammer2_span_list_t;
258
259struct hammer2_reg {
260 SPLAY_ENTRY(hammer2_reg) glob_node; /* index of registrations */
261 TAILQ_ENTRY(hammer2_reg) glob_entry; /* when modified */
262 hammer2_span_list_t span_list; /* list of hammer2_span's */
263 hammer2_span_t *best_span; /* best span entry */
264 hammer2_pmsg_splay_head_t source_root; /* msgs sent from reg */
265 hammer2_pmsg_splay_head_t target_root; /* msgs sent to reg */
266 uuid_t pfs_id; /* key field */
267 uuid_t pfs_fsid; /* key field */
268 uint32_t linkid;
269 int flags;
270 int refs;
271};
272
273#define HAMMER2_PROTO_REGF_MODIFIED 0x0001
274
275/*
276 * Each link (connection) collects spanning tree data received via the
277 * link and stores it in these span structures.
278 */
279struct hammer2_span {
280 TAILQ_ENTRY(hammer2_span) span_entry; /* from hammer2_reg */
281 SPLAY_ENTRY(hammer2_span) span_node; /* from hammer2_link */
282 hammer2_reg_t *reg;
283 hammer2_link_t *link;
284 int weight;
285};
286
287/*
288 * Most hammer2 messages represent transactions and have persistent state
289 * which must be recorded. Some messages, such as cache states and inode
290 * representations are very long-lasting transactions.
291 *
292 * Each node in the graph must keep track of the message state in order
293 * to perform the proper action when a connection is lost. To do this
294 * the message is indexed on the source and target (global) registration,
295 * and the actual span element the message was received on and transmitted
296 * to is recorded (allowing us to retrieve the physical links involved).
297 *
298 * The {source_reg, target_reg, msgid} uniquely identifies a message. Any
299 * streaming operations using the same msgid use the same rendezvous.
300 *
301 * It is important to note that recorded state must use the same physical
302 * link (and thus the same chain of links across the graph) as was 'forged'
303 * by the initial message for that msgid. If the source span a message is
304 * received on does not match the recorded source, or the recorded target
305 * is no longer routeable, the message will be returned or generate an ABORT
306 * with LINKFAIL as appropriate.
307 */
308struct hammer2_pmsg {
309 SPLAY_ENTRY(hammer2_pmsg) source_reg;
310 SPLAY_ENTRY(hammer2_pmsg) target_reg;
311 hammer2_span_t *source;
312 hammer2_span_t *target;
313 uint16_t msgid;
9ab15106
MD
314};
315
316#endif