hammer2 - Add peer_type field to LNK_CONN and LNK_SPAN
[dragonfly.git] / sys / vfs / hammer2 / hammer2_network.h
CommitLineData
9ab15106
MD
1/*
2 * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35#ifndef VFS_HAMMER2_NETWORK_H_
36#define VFS_HAMMER2_NETWORK_H_
37
38#ifndef _VFS_HAMMER2_DISK_H_
39#include "hammer2_disk.h"
40#endif
41
42/*
43 * Mesh network protocol structures.
44 *
10c86c4e
MD
45 * SPAN PROTOCOL
46 *
9ab15106 47 * The mesh is constructed from point-to-point streaming links with varying
10c86c4e
MD
48 * levels of interconnectedness, forming a graph. Terminii in the graph
49 * are entities such as a HAMMER2 PFS or a network mount or other types
50 * of nodes.
51 *
52 * The spanning tree protocol runs symmetrically on every node. Each node
53 * transmits a representitive LNK_SPAN out all available connections. Nodes
54 * also receive LNK_SPANs from other nodes (obviously), and must aggregate,
55 * reduce, and relay those LNK_SPANs out all available connections, thus
56 * propagating the spanning tree. Any connection failure or topology change
57 * causes changes in the LNK_SPAN propagation.
58 *
59 * Each LNK_SPAN or LNK_SPAN relay represents a virtual circuit for routing
60 * purposes. In addition, each relay is chained in one direction,
61 * representing a 1:N fan-out (i.e. one received LNK_SPAN can be relayed out
62 * multiple connections). In order to be able to route a message via a
63 * LNK_SPAN over a deterministic route THE MESSAGE CAN ONLY FLOW FROM A
64 * REMOTE NODE TOWARDS OUR NODE (N:1 fan-in).
65 *
66 * This supports the requirement that we have both message serialization
67 * and positive feedback if a topology change breaks the chain of VCs
68 * the message is flowing over. A remote node sending a message to us
69 * will get positive feedback that the route was broken and can take suitable
70 * action to terminate the transaction with an error.
71 *
72 * TRANSACTIONAL REPLIES
73 *
74 * However, when we receive a command message from a remote node and we want
75 * to reply to it, we have a problem. We want the remote node to have
76 * positive feedback if our reply fails to make it, but if we use a virtual
77 * circuit based on the remote node's LNK_SPAN to us it will be a DIFFERENT
78 * virtual circuit than the one the remote node used to message us. That's
79 * a problem because it means we have no reliable way to notify the remote
80 * node if we get notified that our reply has failed.
81 *
82 * The solution is to first note the fact that the remote chose an optimal
83 * route to get to us, so the reverse should be true. The reason the VC
84 * might not exist over the same route in the reverse is because there may
85 * be multiple paths available with the same distance metric.
86 *
87 * But this also means that we can adjust the messaging protocols to
88 * propagate a LNK_SPAN from the remote to us WHILE the remote's command
89 * message is being sent to us, and it will not only likely be optimal but
90 * it might also already exist, and it will also guarantee that a reply
91 * failure will propagate back to both sides (because even though each
92 * direction is using a different VC chain, the two chains are still
93 * going along the same path).
94 *
95 * We communicate the return VC by having the relay adjust both the target
96 * and the source fields in the message, rather than just the target, on
97 * each relay. As of when the message gets to us the 'source' field will
98 * represent the VC for the return direction (and of course also identify
99 * the node the message came from).
100 *
101 * This way both sides get positive feedback if a topology change disrupts
102 * the VC for the transaction. We also get one additional guarantee, and
103 * that is no spurious messages. Messages simply die when the VC they are
104 * traveling over is broken, in either direction, simple as that.
105 * It makes managing message transactional states very easy.
8c280d5d
MD
106 *
107 * MESSAGE TRANSACTIONAL STATES
9ab15106
MD
108 *
109 * Message state is handled by the CREATE, DELETE, REPLY, and ABORT
110 * flags. Message state is typically recorded at the end points and
111 * at each hop until a DELETE is received from both sides.
112 *
113 * One-way messages such as those used by spanning tree commands are not
26bf1a36
MD
114 * recorded. These are sent without the CREATE, DELETE, or ABORT flags set.
115 * ABORT is not supported for one-off messages. The REPLY bit can be used
116 * to distinguish between command and status if desired.
117 *
118 * Persistent-state messages are messages which require a reply to be
119 * returned. These messages can also consist of multiple message elements
120 * for the command or reply or both (or neither). The command message
121 * sequence sets CREATE on the first message and DELETE on the last message.
122 * A single message command sets both (CREATE|DELETE). The reply message
123 * sequence works the same way but of course also sets the REPLY bit.
124 *
125 * Persistent-state messages can be aborted by sending a message element
126 * with the ABORT flag set. This flag can be combined with either or both
127 * the CREATE and DELETE flags. When combined with the CREATE flag the
128 * command is treated as non-blocking but still executes. Whem combined
129 * with the DELETE flag no additional message elements are required.
130 *
131 * ABORT SPECIAL CASE - Mid-stream aborts. A mid-stream abort can be sent
132 * when supported by the sender by sending an ABORT message with neither
133 * CREATE or DELETE set. This effectively turns the message into a
134 * non-blocking message (but depending on what is being represented can also
135 * cut short prior data elements in the stream).
136 *
137 * ABORT SPECIAL CASE - Abort-after-DELETE. Persistent messages have to be
138 * abortable if the stream/pipe/whatever is lost. In this situation any
139 * forwarding relay needs to unconditionally abort commands and replies that
140 * are still active. This is done by sending an ABORT|DELETE even in
141 * situations where a DELETE has already been sent in that direction. This
142 * is done, for example, when links are in a half-closed state. In this
143 * situation it is possible for the abort request to race a transition to the
144 * fully closed state. ABORT|DELETE messages which race the fully closed
145 * state are expected to be discarded by the other end.
9ab15106 146 *
9ab15106
MD
147 * --
148 *
8c280d5d
MD
149 * All base and extended message headers are 64-byte aligned, and all
150 * transports must support extended message headers up to HAMMER2_MSGHDR_MAX.
151 * Currently we allow extended message headers up to 2048 bytes. Note
152 * that the extended header size is encoded in the 'cmd' field of the header.
9ab15106 153 *
8c280d5d 154 * Any in-band data is padded to a 64-byte alignment and placed directly
9ab15106
MD
155 * after the extended header (after the higher-level cmd/rep structure).
156 * The actual unaligned size of the in-band data is encoded in the aux_bytes
157 * field in this case. Maximum data sizes are negotiated during registration.
158 *
8c280d5d
MD
159 * Auxillary data can be in-band or out-of-band. In-band data sets aux_descr
160 * equal to 0. Any out-of-band data must be negotiated by the SPAN protocol.
161 *
162 * Auxillary data, whether in-band or out-of-band, must be at-least 64-byte
163 * aligned. The aux_bytes field contains the actual byte-granular length
164 * and not the aligned length.
165 *
166 * hdr_crc is calculated over the entire, ALIGNED extended header. For
167 * the purposes of calculating the crc, the hdr_crc field is 0. That is,
168 * if calculating the crc in HW a 32-bit '0' must be inserted in place of
169 * the hdr_crc field when reading the entire header and compared at the
170 * end (but the actual hdr_crc must be left intact in memory). A simple
171 * counter to replace the field going into the CRC generator does the job
172 * in HW. The CRC endian is based on the magic number field and may have
173 * to be byte-swapped, too (which is also easy to do in HW).
174 *
175 * aux_crc is calculated over the entire, ALIGNED auxillary data.
176 *
177 * SHARED MEMORY IMPLEMENTATIONS
178 *
179 * Shared-memory implementations typically use a pipe to transmit the extended
180 * message header and shared memory to store any auxilary data. Auxillary
181 * data in one-way (non-transactional) messages is typically required to be
182 * inline. CRCs are still recommended and required at the beginning, but
183 * may be negotiated away later.
184 *
185 * MULTI-PATH MESSAGE DUPLICATION
186 *
187 * Redundancy can be negotiated but is not required in the current spec.
188 * Basically you send the same message, with the same msgid, via several
189 * paths to the target. The msgid is the rendezvous. The first copy that
190 * makes it to the target is used, the second is ignored. Similarly for
191 * replies. This can improve performance during span flapping. Only
192 * transactional messages will be serialized. The target might receive
193 * multiple copies of one-way messages in higher protocol layers (potentially
194 * out of order, too).
9ab15106
MD
195 */
196struct hammer2_msg_hdr {
8c280d5d 197 uint16_t magic; /* 00 sanity, synchro, endian */
10c86c4e 198 uint16_t reserved02; /* 02 */
8c280d5d
MD
199 uint32_t salt; /* 04 random salt helps w/crypto */
200
201 uint64_t msgid; /* 08 message transaction id */
10c86c4e
MD
202 uint64_t source; /* 10 originator or 0 */
203 uint64_t target; /* 18 destination or 0 */
8c280d5d 204
10c86c4e
MD
205 uint32_t cmd; /* 20 flags | cmd | hdr_size / ALIGN */
206 uint32_t aux_crc; /* 24 auxillary data crc */
207 uint32_t aux_bytes; /* 28 auxillary data length (bytes) */
208 uint32_t error; /* 2C error code or 0 */
209 uint64_t aux_descr; /* 30 negotiated OOB data descr */
8c280d5d
MD
210 uint32_t reserved38; /* 38 */
211 uint32_t hdr_crc; /* 3C (aligned) extended header crc */
9ab15106
MD
212};
213
214typedef struct hammer2_msg_hdr hammer2_msg_hdr_t;
215
216#define HAMMER2_MSGHDR_MAGIC 0x4832
217#define HAMMER2_MSGHDR_MAGIC_REV 0x3248
218#define HAMMER2_MSGHDR_CRCOFF offsetof(hammer2_msg_hdr_t, salt)
219#define HAMMER2_MSGHDR_CRCBYTES (sizeof(hammer2_msg_hdr_t) - \
220 HAMMER2_MSGHDR_CRCOFF)
221
222/*
223 * Administrative protocol limits.
224 */
8c280d5d
MD
225#define HAMMER2_MSGHDR_MAX 2048 /* <= 65535 */
226#define HAMMER2_MSGAUX_MAX 65536 /* <= 1MB */
9ab15106
MD
227#define HAMMER2_MSGBUF_SIZE (HAMMER2_MSGHDR_MAX * 4)
228#define HAMMER2_MSGBUF_MASK (HAMMER2_MSGBUF_SIZE - 1)
229
230/*
231 * The message (cmd) field also encodes various flags and the total size
232 * of the message header. This allows the protocol processors to validate
233 * persistency and structural settings for every command simply by
234 * switch()ing on the (cmd) field.
235 */
236#define HAMMER2_MSGF_CREATE 0x80000000U /* msg start */
237#define HAMMER2_MSGF_DELETE 0x40000000U /* msg end */
238#define HAMMER2_MSGF_REPLY 0x20000000U /* reply path */
239#define HAMMER2_MSGF_ABORT 0x10000000U /* abort req */
240#define HAMMER2_MSGF_AUXOOB 0x08000000U /* aux-data is OOB */
241#define HAMMER2_MSGF_FLAG2 0x04000000U
242#define HAMMER2_MSGF_FLAG1 0x02000000U
243#define HAMMER2_MSGF_FLAG0 0x01000000U
244
245#define HAMMER2_MSGF_FLAGS 0xFF000000U /* all flags */
246#define HAMMER2_MSGF_PROTOS 0x00F00000U /* all protos */
247#define HAMMER2_MSGF_CMDS 0x000FFF00U /* all cmds */
248#define HAMMER2_MSGF_SIZE 0x000000FFU /* N*32 */
249
250#define HAMMER2_MSGF_CMDSWMASK (HAMMER2_MSGF_CMDS | \
251 HAMMER2_MSGF_SIZE | \
252 HAMMER2_MSGF_PROTOS | \
253 HAMMER2_MSGF_REPLY)
42e2a62e 254
f2e07ffb
MD
255#define HAMMER2_MSGF_BASECMDMASK (HAMMER2_MSGF_CMDS | \
256 HAMMER2_MSGF_SIZE | \
257 HAMMER2_MSGF_PROTOS)
9ab15106 258
42e2a62e
MD
259#define HAMMER2_MSGF_TRANSMASK (HAMMER2_MSGF_CMDS | \
260 HAMMER2_MSGF_SIZE | \
261 HAMMER2_MSGF_PROTOS | \
262 HAMMER2_MSGF_REPLY | \
263 HAMMER2_MSGF_CREATE | \
264 HAMMER2_MSGF_DELETE)
265
9ab15106
MD
266#define HAMMER2_MSG_PROTO_LNK 0x00000000U
267#define HAMMER2_MSG_PROTO_DBG 0x00100000U
9b8b748f
MD
268#define HAMMER2_MSG_PROTO_DOM 0x00200000U
269#define HAMMER2_MSG_PROTO_CAC 0x00300000U
270#define HAMMER2_MSG_PROTO_QRM 0x00400000U
271#define HAMMER2_MSG_PROTO_BLK 0x00500000U
272#define HAMMER2_MSG_PROTO_VOP 0x00600000U
9ab15106
MD
273
274/*
275 * Message command constructors, sans flags
276 */
8c280d5d 277#define HAMMER2_MSG_ALIGN 64
9ab15106
MD
278#define HAMMER2_MSG_ALIGNMASK (HAMMER2_MSG_ALIGN - 1)
279#define HAMMER2_MSG_DOALIGN(bytes) (((bytes) + HAMMER2_MSG_ALIGNMASK) & \
280 ~HAMMER2_MSG_ALIGNMASK)
f2e07ffb 281#define HAMMER2_MSG_HDR_ENCODE(elm) (((uint32_t)sizeof(struct elm) + \
9ab15106
MD
282 HAMMER2_MSG_ALIGNMASK) / \
283 HAMMER2_MSG_ALIGN)
284
285#define HAMMER2_MSG_LNK(cmd, elm) (HAMMER2_MSG_PROTO_LNK | \
286 ((cmd) << 8) | \
287 HAMMER2_MSG_HDR_ENCODE(elm))
288
289#define HAMMER2_MSG_DBG(cmd, elm) (HAMMER2_MSG_PROTO_DBG | \
290 ((cmd) << 8) | \
291 HAMMER2_MSG_HDR_ENCODE(elm))
292
9b8b748f
MD
293#define HAMMER2_MSG_DOM(cmd, elm) (HAMMER2_MSG_PROTO_DOM | \
294 ((cmd) << 8) | \
295 HAMMER2_MSG_HDR_ENCODE(elm))
296
9ab15106
MD
297#define HAMMER2_MSG_CAC(cmd, elm) (HAMMER2_MSG_PROTO_CAC | \
298 ((cmd) << 8) | \
299 HAMMER2_MSG_HDR_ENCODE(elm))
300
301#define HAMMER2_MSG_QRM(cmd, elm) (HAMMER2_MSG_PROTO_QRM | \
302 ((cmd) << 8) | \
303 HAMMER2_MSG_HDR_ENCODE(elm))
304
305#define HAMMER2_MSG_BLK(cmd, elm) (HAMMER2_MSG_PROTO_BLK | \
306 ((cmd) << 8) | \
307 HAMMER2_MSG_HDR_ENCODE(elm))
308
309#define HAMMER2_MSG_VOP(cmd, elm) (HAMMER2_MSG_PROTO_VOP | \
310 ((cmd) << 8) | \
311 HAMMER2_MSG_HDR_ENCODE(elm))
312
313/*
314 * Link layer ops basically talk to just the other side of a direct
315 * connection.
316 *
1a34728c 317 * LNK_PAD - One-way message on link-0, ignored by target. Used to
9ab15106
MD
318 * pad message buffers on shared-memory transports. Not
319 * typically used with TCP.
320 *
1a34728c 321 * LNK_PING - One-way message on link-0, keep-alive, run by both sides
8c280d5d
MD
322 * typically 1/sec on idle link, link is lost after 10 seconds
323 * of inactivity.
324 *
1a34728c 325 * LNK_AUTH - Authenticate the connection, negotiate administrative
9ab15106
MD
326 * rights & encryption, protocol class, etc. Only PAD and
327 * AUTH messages (not even PING) are accepted until
328 * authentication is complete. This message also identifies
329 * the host.
330 *
1a34728c 331 * LNK_CONN - Enable the SPAN protocol on link-0, possibly also installing
8c280d5d
MD
332 * a PFS filter (by cluster id, unique id, and/or wildcarded
333 * name).
9ab15106 334 *
1a34728c 335 * LNK_SPAN - A SPAN transaction on link-0 enables messages to be relayed
8c280d5d
MD
336 * to/from a particular cluster node. SPANs are received,
337 * sorted, aggregated, and retransmitted back out across all
338 * applicable connections.
9ab15106
MD
339 *
340 * The leaf protocol also uses this to make a PFS available
341 * to the cluster (e.g. on-mount).
1a34728c
MD
342 *
343 * LNK_VOLCONF - Volume header configuration change. All hammer2
344 * connections (hammer2 connect ...) stored in the volume
345 * header are spammed at the link level to the hammer2
346 * service daemon, and any live configuration change
347 * thereafter.
9ab15106
MD
348 */
349#define HAMMER2_LNK_PAD HAMMER2_MSG_LNK(0x000, hammer2_msg_hdr)
350#define HAMMER2_LNK_PING HAMMER2_MSG_LNK(0x001, hammer2_msg_hdr)
351#define HAMMER2_LNK_AUTH HAMMER2_MSG_LNK(0x010, hammer2_lnk_auth)
8c280d5d
MD
352#define HAMMER2_LNK_CONN HAMMER2_MSG_LNK(0x011, hammer2_lnk_conn)
353#define HAMMER2_LNK_SPAN HAMMER2_MSG_LNK(0x012, hammer2_lnk_span)
1a34728c 354#define HAMMER2_LNK_VOLCONF HAMMER2_MSG_LNK(0x020, hammer2_lnk_volconf)
9ab15106
MD
355#define HAMMER2_LNK_ERROR HAMMER2_MSG_LNK(0xFFF, hammer2_msg_hdr)
356
357/*
8c280d5d
MD
358 * LNK_CONN - Register connection for SPAN (transaction, left open)
359 *
360 * One LNK_CONN transaction may be opened on a stream connection, registering
361 * the connection with the SPAN subsystem and allowing the subsystem to
362 * accept and relay SPANs to this connection.
363 *
364 * The LNK_CONN message may contain a filter, limiting the desireable SPANs.
365 *
366 * This message contains a lot of the same info that a SPAN message contains,
367 * but is not a SPAN. That is, without this message the SPAN subprotocol will
368 * not be executed on the connection, nor is this message a promise that the
369 * sending end is a client or node of a cluster.
370 */
81666e1b
MD
371struct hammer2_lnk_auth {
372 hammer2_msg_hdr_t head;
373 char dummy[64];
374};
375
2063f4d7
MD
376/*
377 * LNK_CONN identifies a streaming connection into the cluster. The other
378 * fields serve as a filter when supported for a particular peer and are
379 * not necessarily all used.
380 *
381 * peer_mask serves to filter the SPANs we receive by peer. A cluster
382 * controller typically sets this to (uint64_t)-1, a block devfs
383 * interface might set it to 1 << HAMMER2_PEER_DISK, and a hammer2
384 * mount might set it to 1 << HAMMER2_PEER_HAMMER2.
385 *
386 * mediaid allows multiple (e.g. HAMMER2) connections belonging to the same
387 * media, in terms of LNK_VOLCONF updates.
388 *
389 * pfs_clid, pfs_fsid, pfs_type, and label are peer-specific and must be
390 * left empty (zero-fill) if not supported by a particular peer.
391 *
392 * HAMMER2_PEER_CLUSTER filter: none
393 * HAMMER2_PEER_DISK filter: label
394 * HAMMER2_PEER_HAMMER2 filter: pfs_clid if not empty, and label
395 */
8c280d5d
MD
396struct hammer2_lnk_conn {
397 hammer2_msg_hdr_t head;
1a34728c 398 uuid_t mediaid; /* media configuration id */
8c280d5d
MD
399 uuid_t pfs_clid; /* rendezvous pfs uuid */
400 uuid_t pfs_fsid; /* unique pfs uuid */
2063f4d7
MD
401 uint64_t peer_mask; /* PEER mask for SPAN filtering */
402 uint8_t peer_type; /* see HAMMER2_PEER_xxx */
403 uint8_t pfs_type; /* pfs type */
8c280d5d
MD
404 uint16_t proto_version; /* high level protocol support */
405 uint32_t status; /* status flags */
406 uint8_t reserved02[8];
32d51501 407 int32_t dist; /* span distance */
2063f4d7 408 uint32_t reserved03[14];
8c280d5d
MD
409 char label[256]; /* PFS label (can be wildcard) */
410};
411
412typedef struct hammer2_lnk_conn hammer2_lnk_conn_t;
413
414/*
415 * LNK_SPAN - Relay a SPAN (transaction, left open)
9b8b748f
MD
416 *
417 * This message registers a PFS/PFS_TYPE with the other end of the connection,
418 * telling the other end who we are and what we can provide or what we want
419 * to consume. Multiple registrations can be maintained as open transactions
420 * with each one specifying a unique {source} linkid.
421 *
422 * Registrations are sent from {source}=S {1...n} to {target}=0 and maintained
423 * as open transactions. Registrations are also received and maintains as
424 * open transactions, creating a matrix of linkid's.
425 *
426 * While these transactions are open additional transactions can be executed
427 * between any two linkid's {source}=S (registrations we sent) to {target}=T
428 * (registrations we received).
429 *
430 * Closure of any registration transaction will automatically abort any open
431 * transactions using the related linkids. Closure can be initiated
432 * voluntarily from either side with either end issuing a DELETE, or they
433 * can be ABORTed.
434 *
435 * Status updates are performed via the open transaction.
436 *
437 * --
438 *
439 * A registration identifies a node and its various PFS parameters including
440 * the PFS_TYPE. For example, a diskless HAMMER2 client typically identifies
441 * itself as PFSTYPE_CLIENT.
442 *
443 * Any node may serve as a cluster controller, aggregating and passing
444 * on received registrations, but end-points do not have to implement this
445 * ability. Most end-points typically implement a single client-style or
446 * server-style PFS_TYPE and rendezvous at a cluster controller.
447 *
448 * The cluster controller does not aggregate/pass-on all received
449 * registrations. It typically filters what gets passed on based on
450 * what it receives.
451 *
452 * STATUS UPDATES: Status updates use the same structure but typically
453 * only contain incremental changes to pfs_type, with the
454 * label field containing a text status.
455 */
456struct hammer2_lnk_span {
457 hammer2_msg_hdr_t head;
8c280d5d 458 uuid_t pfs_clid; /* rendezvous pfs uuid */
9b8b748f 459 uuid_t pfs_fsid; /* unique pfs uuid */
2063f4d7
MD
460 uint8_t pfs_type; /* PFS type */
461 uint8_t peer_type; /* PEER type */
9b8b748f
MD
462 uint16_t proto_version; /* high level protocol support */
463 uint32_t status; /* status flags */
464 uint8_t reserved02[8];
32d51501 465 int32_t dist; /* span distance */
8c280d5d 466 uint32_t reserved03[15];
9b8b748f
MD
467 char label[256]; /* PFS label (can be wildcard) */
468};
469
42e2a62e
MD
470typedef struct hammer2_lnk_span hammer2_lnk_span_t;
471
472#define HAMMER2_SPAN_PROTO_1 1
473
9b8b748f 474/*
1a34728c
MD
475 * LNK_VOLCONF
476 */
477struct hammer2_lnk_volconf {
478 hammer2_msg_hdr_t head;
479 hammer2_copy_data_t copy; /* copy spec */
480 int32_t index;
481 int32_t unused01;
482 uuid_t mediaid;
483 int64_t reserved02[32];
484};
485
486typedef struct hammer2_lnk_volconf hammer2_lnk_volconf_t;
487
488/*
9ab15106
MD
489 * Debug layer ops operate on any link
490 *
491 * SHELL - Persist stream, access the debug shell on the target
492 * registration. Multiple shells can be operational.
493 */
494#define HAMMER2_DBG_SHELL HAMMER2_MSG_DBG(0x001, hammer2_dbg_shell)
495
496struct hammer2_dbg_shell {
497 hammer2_msg_hdr_t head;
498};
499typedef struct hammer2_dbg_shell hammer2_dbg_shell_t;
500
501/*
9b8b748f
MD
502 * Domain layer ops operate on any link, link-0 may be used when the
503 * directory connected target is the desired registration.
504 *
505 * (nothing defined)
506 */
507
508/*
9ab15106
MD
509 * Cache layer ops operate on any link, link-0 may be used when the
510 * directly connected target is the desired registration.
511 *
512 * LOCK - Persist state, blockable, abortable.
513 *
514 * Obtain cache state (MODIFIED, EXCLUSIVE, SHARED, or INVAL)
515 * in any of three domains (TREE, INUM, ATTR, DIRENT) for a
516 * particular key relative to cache state already owned.
517 *
518 * TREE - Effects entire sub-tree at the specified element
519 * and will cause existing cache state owned by
520 * other nodes to be adjusted such that the request
521 * can be granted.
522 *
523 * INUM - Only effects inode creation/deletion of an existing
524 * element or a new element, by inumber and/or name.
525 * typically can be held for very long periods of time
526 * (think the vnode cache), directly relates to
527 * hammer2_chain structures representing inodes.
528 *
529 * ATTR - Only effects an inode's attributes, such as
530 * ownership, modes, etc. Used for lookups, chdir,
531 * open, etc. mtime has no affect.
532 *
533 * DIRENT - Only affects an inode's attributes plus the
534 * attributes or names related to any directory entry
535 * directly under this inode (non-recursively). Can
536 * be retained for medium periods of time when doing
537 * directory scans.
538 *
539 * This function may block and can be aborted. You may be
540 * granted cache state that is more broad than the state you
541 * requested (e.g. a different set of domains and/or an element
542 * at a higher layer in the tree). When quorum operations
543 * are used you may have to reconcile these grants to the
544 * lowest common denominator.
545 *
546 * In order to grant your request either you or the target
547 * (or both) may have to obtain a quorum agreement. Deadlock
548 * resolution may be required. When doing it yourself you
549 * will typically maintain an active message to each master
550 * node in the system. You can only grant the cache state
551 * when a quorum of nodes agree.
552 *
553 * The cache state includes transaction id information which
554 * can be used to resolve data requests.
555 */
556#define HAMMER2_CAC_LOCK HAMMER2_MSG_CAC(0x001, hammer2_cac_lock)
557
558/*
559 * Quorum layer ops operate on any link, link-0 may be used when the
560 * directly connected target is the desired registration.
561 *
562 * COMMIT - Persist state, blockable, abortable
563 *
564 * Issue a COMMIT in two phases. A quorum must acknowledge
565 * the operation to proceed to phase-2. Message-update to
566 * proceed to phase-2.
567 */
568#define HAMMER2_QRM_COMMIT HAMMER2_MSG_QRM(0x001, hammer2_qrm_commit)
569
570/*
8c280d5d
MD
571 * NOTE!!!! ALL EXTENDED HEADER STRUCTURES MUST BE 64-BYTE ALIGNED!!!
572 *
9ab15106
MD
573 * General message errors
574 *
575 * 0x00 - 0x1F Local iocomm errors
576 * 0x20 - 0x2F Global errors
577 */
81666e1b 578#define HAMMER2_MSG_ERR_NOSUPP 0x20
9ab15106 579
42e2a62e 580union hammer2_msg_any {
9ab15106
MD
581 char buf[HAMMER2_MSGHDR_MAX];
582 hammer2_msg_hdr_t head;
42e2a62e 583 hammer2_lnk_span_t lnk_span;
8c280d5d 584 hammer2_lnk_conn_t lnk_conn;
1a34728c 585 hammer2_lnk_volconf_t lnk_volconf;
9ab15106
MD
586};
587
42e2a62e 588typedef union hammer2_msg_any hammer2_msg_any_t;
9ab15106
MD
589
590#endif