hammer2 - userland API / span work
[dragonfly.git] / sys / vfs / hammer2 / hammer2_network.h
CommitLineData
9ab15106
MD
1/*
2 * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35#ifndef VFS_HAMMER2_NETWORK_H_
36#define VFS_HAMMER2_NETWORK_H_
37
38#ifndef _VFS_HAMMER2_DISK_H_
39#include "hammer2_disk.h"
40#endif
41
42/*
43 * Mesh network protocol structures.
44 *
45 * The mesh is constructed from point-to-point streaming links with varying
8c280d5d
MD
46 * levels of interconnectedness, forming a graph. The spanning tree protocol
47 * running on each node transmits a LNK_SPAN transactional message to the
48 * other end. The protocol collects LNK_SPAN messages from all sources,
32d51501 49 * aggregates them using a shortest-distance-path algorithm, and transmits
8c280d5d
MD
50 * them over each link as well, creating a multplication within the topology.
51 *
52 * Any node in the graph may transmit a message to any other node by using
53 * the msgid of the LNK_SPAN open transaction as the message's 'linkid'.
54 * This identifies both sides so there is no 'source' and 'target' per-say.
55 *
56 * Open transactions are recorded by the source and the target, but not by
57 * intermediate nodes in the route. Streaming protocols are used. If a
58 * span element is lost its transaction will be aborted automatically (even
59 * if other routes to the same target are available), and any related
60 * messages will be aborted. If the span element was chosen for aggregation
61 * this will propagate through the entire topology and thus ultimately reach
62 * the target which used the aggregated span element, but does not
63 * necessarily effect all paths in the topology.
64 *
65 * When a link failure occurs all SPANs related to that link are
66 * transactionally closed. The SPANs are not deleted until closed in
67 * both directions, thus the spanid serves as a placeholder allowing all
68 * in-transit messages being routed over that spanid to be properly thrown
69 * out. Once completely closed the spanid can be reused.
70 *
71 * NOTE: Multiple spans for the same physical {fsid,pfs_fsid} can be
72 * forwarded, allowing concurrency within the topology.
73 *
74 * NOTE: It is important that messages in a lost route be aborted because
75 * the messaging protocol expects serialization over any given route.
76 * Only propagated spans are forwarded as spans to other nodes, so any
77 * given open span transaction will represent a specific path.
78 *
79 * If a portion of the path in the middle of the topology is lost it
80 * will propagate in both directions all the way to the ends that used
81 * it. Intermediate route nodes DO NOT silently re-route messages to
82 * another span. Messages in-flight will meet the updating SPAN and
83 * simply be discarded by intermediate nodes. Ultimately the updating
84 * SPAN reaches all end-points and auto-aborts the open transaction.
85 *
86 * If another path is available the transaction can be instantly
87 * retried.
88 *
89 * NOTE: It is possible to route messages virtually using the msgid of any
90 * open transaction instead of the msgid of a SPAN transaction, but
91 * not recommended and not currently coded.
92 *
93 * NOTE: Both the msgid and the spanid are 64-bit fields and may be populated
94 * with actual memory pointers (which simplifies the end-points).
95 * However, all such identifiers must be indexed as appropriate by the
96 * nodes and verified as being valid before any memory dereference
97 * occurs, for obvious reasons.
9ab15106
MD
98 *
99 * All message responses follow the SAME PATH that the original message
100 * followed, but in reverse. This is an absolute requirement since messages
8c280d5d
MD
101 * expecting replies record persistent state at each hop. Sequencing must
102 * be preserved.
103 *
104 * MESSAGE TRANSACTIONAL STATES
9ab15106
MD
105 *
106 * Message state is handled by the CREATE, DELETE, REPLY, and ABORT
107 * flags. Message state is typically recorded at the end points and
108 * at each hop until a DELETE is received from both sides.
109 *
110 * One-way messages such as those used by spanning tree commands are not
26bf1a36
MD
111 * recorded. These are sent without the CREATE, DELETE, or ABORT flags set.
112 * ABORT is not supported for one-off messages. The REPLY bit can be used
113 * to distinguish between command and status if desired.
114 *
115 * Persistent-state messages are messages which require a reply to be
116 * returned. These messages can also consist of multiple message elements
117 * for the command or reply or both (or neither). The command message
118 * sequence sets CREATE on the first message and DELETE on the last message.
119 * A single message command sets both (CREATE|DELETE). The reply message
120 * sequence works the same way but of course also sets the REPLY bit.
121 *
122 * Persistent-state messages can be aborted by sending a message element
123 * with the ABORT flag set. This flag can be combined with either or both
124 * the CREATE and DELETE flags. When combined with the CREATE flag the
125 * command is treated as non-blocking but still executes. Whem combined
126 * with the DELETE flag no additional message elements are required.
127 *
128 * ABORT SPECIAL CASE - Mid-stream aborts. A mid-stream abort can be sent
129 * when supported by the sender by sending an ABORT message with neither
130 * CREATE or DELETE set. This effectively turns the message into a
131 * non-blocking message (but depending on what is being represented can also
132 * cut short prior data elements in the stream).
133 *
134 * ABORT SPECIAL CASE - Abort-after-DELETE. Persistent messages have to be
135 * abortable if the stream/pipe/whatever is lost. In this situation any
136 * forwarding relay needs to unconditionally abort commands and replies that
137 * are still active. This is done by sending an ABORT|DELETE even in
138 * situations where a DELETE has already been sent in that direction. This
139 * is done, for example, when links are in a half-closed state. In this
140 * situation it is possible for the abort request to race a transition to the
141 * fully closed state. ABORT|DELETE messages which race the fully closed
142 * state are expected to be discarded by the other end.
9ab15106 143 *
9ab15106
MD
144 * --
145 *
8c280d5d
MD
146 * All base and extended message headers are 64-byte aligned, and all
147 * transports must support extended message headers up to HAMMER2_MSGHDR_MAX.
148 * Currently we allow extended message headers up to 2048 bytes. Note
149 * that the extended header size is encoded in the 'cmd' field of the header.
9ab15106 150 *
8c280d5d 151 * Any in-band data is padded to a 64-byte alignment and placed directly
9ab15106
MD
152 * after the extended header (after the higher-level cmd/rep structure).
153 * The actual unaligned size of the in-band data is encoded in the aux_bytes
154 * field in this case. Maximum data sizes are negotiated during registration.
155 *
8c280d5d
MD
156 * Auxillary data can be in-band or out-of-band. In-band data sets aux_descr
157 * equal to 0. Any out-of-band data must be negotiated by the SPAN protocol.
158 *
159 * Auxillary data, whether in-band or out-of-band, must be at-least 64-byte
160 * aligned. The aux_bytes field contains the actual byte-granular length
161 * and not the aligned length.
162 *
163 * hdr_crc is calculated over the entire, ALIGNED extended header. For
164 * the purposes of calculating the crc, the hdr_crc field is 0. That is,
165 * if calculating the crc in HW a 32-bit '0' must be inserted in place of
166 * the hdr_crc field when reading the entire header and compared at the
167 * end (but the actual hdr_crc must be left intact in memory). A simple
168 * counter to replace the field going into the CRC generator does the job
169 * in HW. The CRC endian is based on the magic number field and may have
170 * to be byte-swapped, too (which is also easy to do in HW).
171 *
172 * aux_crc is calculated over the entire, ALIGNED auxillary data.
173 *
174 * SHARED MEMORY IMPLEMENTATIONS
175 *
176 * Shared-memory implementations typically use a pipe to transmit the extended
177 * message header and shared memory to store any auxilary data. Auxillary
178 * data in one-way (non-transactional) messages is typically required to be
179 * inline. CRCs are still recommended and required at the beginning, but
180 * may be negotiated away later.
181 *
182 * MULTI-PATH MESSAGE DUPLICATION
183 *
184 * Redundancy can be negotiated but is not required in the current spec.
185 * Basically you send the same message, with the same msgid, via several
186 * paths to the target. The msgid is the rendezvous. The first copy that
187 * makes it to the target is used, the second is ignored. Similarly for
188 * replies. This can improve performance during span flapping. Only
189 * transactional messages will be serialized. The target might receive
190 * multiple copies of one-way messages in higher protocol layers (potentially
191 * out of order, too).
9ab15106
MD
192 */
193struct hammer2_msg_hdr {
8c280d5d
MD
194 uint16_t magic; /* 00 sanity, synchro, endian */
195 uint16_t reserved02; /* 02 size of header in bytes */
196 uint32_t salt; /* 04 random salt helps w/crypto */
197
198 uint64_t msgid; /* 08 message transaction id */
199 uint64_t spanid; /* 10 message routing id or 0 */
200
201 uint32_t cmd; /* 18 flags | cmd | hdr_size / ALIGN */
202 uint32_t aux_crc; /* 1C auxillary data crc */
203 uint32_t aux_bytes; /* 20 auxillary data length (bytes) */
204 uint32_t error; /* 24 error code or 0 */
205 uint64_t aux_descr; /* 28 negotiated OOB data descr */
206 uint64_t reserved30; /* 30 */
207 uint32_t reserved38; /* 38 */
208 uint32_t hdr_crc; /* 3C (aligned) extended header crc */
9ab15106
MD
209};
210
211typedef struct hammer2_msg_hdr hammer2_msg_hdr_t;
212
213#define HAMMER2_MSGHDR_MAGIC 0x4832
214#define HAMMER2_MSGHDR_MAGIC_REV 0x3248
215#define HAMMER2_MSGHDR_CRCOFF offsetof(hammer2_msg_hdr_t, salt)
216#define HAMMER2_MSGHDR_CRCBYTES (sizeof(hammer2_msg_hdr_t) - \
217 HAMMER2_MSGHDR_CRCOFF)
218
219/*
220 * Administrative protocol limits.
221 */
8c280d5d
MD
222#define HAMMER2_MSGHDR_MAX 2048 /* <= 65535 */
223#define HAMMER2_MSGAUX_MAX 65536 /* <= 1MB */
9ab15106
MD
224#define HAMMER2_MSGBUF_SIZE (HAMMER2_MSGHDR_MAX * 4)
225#define HAMMER2_MSGBUF_MASK (HAMMER2_MSGBUF_SIZE - 1)
226
227/*
228 * The message (cmd) field also encodes various flags and the total size
229 * of the message header. This allows the protocol processors to validate
230 * persistency and structural settings for every command simply by
231 * switch()ing on the (cmd) field.
232 */
233#define HAMMER2_MSGF_CREATE 0x80000000U /* msg start */
234#define HAMMER2_MSGF_DELETE 0x40000000U /* msg end */
235#define HAMMER2_MSGF_REPLY 0x20000000U /* reply path */
236#define HAMMER2_MSGF_ABORT 0x10000000U /* abort req */
237#define HAMMER2_MSGF_AUXOOB 0x08000000U /* aux-data is OOB */
238#define HAMMER2_MSGF_FLAG2 0x04000000U
239#define HAMMER2_MSGF_FLAG1 0x02000000U
240#define HAMMER2_MSGF_FLAG0 0x01000000U
241
242#define HAMMER2_MSGF_FLAGS 0xFF000000U /* all flags */
243#define HAMMER2_MSGF_PROTOS 0x00F00000U /* all protos */
244#define HAMMER2_MSGF_CMDS 0x000FFF00U /* all cmds */
245#define HAMMER2_MSGF_SIZE 0x000000FFU /* N*32 */
246
247#define HAMMER2_MSGF_CMDSWMASK (HAMMER2_MSGF_CMDS | \
248 HAMMER2_MSGF_SIZE | \
249 HAMMER2_MSGF_PROTOS | \
250 HAMMER2_MSGF_REPLY)
42e2a62e 251
f2e07ffb
MD
252#define HAMMER2_MSGF_BASECMDMASK (HAMMER2_MSGF_CMDS | \
253 HAMMER2_MSGF_SIZE | \
254 HAMMER2_MSGF_PROTOS)
9ab15106 255
42e2a62e
MD
256#define HAMMER2_MSGF_TRANSMASK (HAMMER2_MSGF_CMDS | \
257 HAMMER2_MSGF_SIZE | \
258 HAMMER2_MSGF_PROTOS | \
259 HAMMER2_MSGF_REPLY | \
260 HAMMER2_MSGF_CREATE | \
261 HAMMER2_MSGF_DELETE)
262
9ab15106
MD
263#define HAMMER2_MSG_PROTO_LNK 0x00000000U
264#define HAMMER2_MSG_PROTO_DBG 0x00100000U
9b8b748f
MD
265#define HAMMER2_MSG_PROTO_DOM 0x00200000U
266#define HAMMER2_MSG_PROTO_CAC 0x00300000U
267#define HAMMER2_MSG_PROTO_QRM 0x00400000U
268#define HAMMER2_MSG_PROTO_BLK 0x00500000U
269#define HAMMER2_MSG_PROTO_VOP 0x00600000U
9ab15106
MD
270
271/*
272 * Message command constructors, sans flags
273 */
8c280d5d 274#define HAMMER2_MSG_ALIGN 64
9ab15106
MD
275#define HAMMER2_MSG_ALIGNMASK (HAMMER2_MSG_ALIGN - 1)
276#define HAMMER2_MSG_DOALIGN(bytes) (((bytes) + HAMMER2_MSG_ALIGNMASK) & \
277 ~HAMMER2_MSG_ALIGNMASK)
f2e07ffb 278#define HAMMER2_MSG_HDR_ENCODE(elm) (((uint32_t)sizeof(struct elm) + \
9ab15106
MD
279 HAMMER2_MSG_ALIGNMASK) / \
280 HAMMER2_MSG_ALIGN)
281
282#define HAMMER2_MSG_LNK(cmd, elm) (HAMMER2_MSG_PROTO_LNK | \
283 ((cmd) << 8) | \
284 HAMMER2_MSG_HDR_ENCODE(elm))
285
286#define HAMMER2_MSG_DBG(cmd, elm) (HAMMER2_MSG_PROTO_DBG | \
287 ((cmd) << 8) | \
288 HAMMER2_MSG_HDR_ENCODE(elm))
289
9b8b748f
MD
290#define HAMMER2_MSG_DOM(cmd, elm) (HAMMER2_MSG_PROTO_DOM | \
291 ((cmd) << 8) | \
292 HAMMER2_MSG_HDR_ENCODE(elm))
293
9ab15106
MD
294#define HAMMER2_MSG_CAC(cmd, elm) (HAMMER2_MSG_PROTO_CAC | \
295 ((cmd) << 8) | \
296 HAMMER2_MSG_HDR_ENCODE(elm))
297
298#define HAMMER2_MSG_QRM(cmd, elm) (HAMMER2_MSG_PROTO_QRM | \
299 ((cmd) << 8) | \
300 HAMMER2_MSG_HDR_ENCODE(elm))
301
302#define HAMMER2_MSG_BLK(cmd, elm) (HAMMER2_MSG_PROTO_BLK | \
303 ((cmd) << 8) | \
304 HAMMER2_MSG_HDR_ENCODE(elm))
305
306#define HAMMER2_MSG_VOP(cmd, elm) (HAMMER2_MSG_PROTO_VOP | \
307 ((cmd) << 8) | \
308 HAMMER2_MSG_HDR_ENCODE(elm))
309
310/*
311 * Link layer ops basically talk to just the other side of a direct
312 * connection.
313 *
314 * PAD - One-way message on link-0, ignored by target. Used to
315 * pad message buffers on shared-memory transports. Not
316 * typically used with TCP.
317 *
8c280d5d
MD
318 * PING - One-way message on link-0, keep-alive, run by both sides
319 * typically 1/sec on idle link, link is lost after 10 seconds
320 * of inactivity.
321 *
9b8b748f 322 * AUTH - Authenticate the connection, negotiate administrative
9ab15106
MD
323 * rights & encryption, protocol class, etc. Only PAD and
324 * AUTH messages (not even PING) are accepted until
325 * authentication is complete. This message also identifies
326 * the host.
327 *
8c280d5d
MD
328 * CONN - Enable the SPAN protocol on link-0, possibly also installing
329 * a PFS filter (by cluster id, unique id, and/or wildcarded
330 * name).
9ab15106 331 *
8c280d5d
MD
332 * SPAN - A SPAN transaction on link-0 enables messages to be relayed
333 * to/from a particular cluster node. SPANs are received,
334 * sorted, aggregated, and retransmitted back out across all
335 * applicable connections.
9ab15106
MD
336 *
337 * The leaf protocol also uses this to make a PFS available
338 * to the cluster (e.g. on-mount).
339 */
340#define HAMMER2_LNK_PAD HAMMER2_MSG_LNK(0x000, hammer2_msg_hdr)
341#define HAMMER2_LNK_PING HAMMER2_MSG_LNK(0x001, hammer2_msg_hdr)
342#define HAMMER2_LNK_AUTH HAMMER2_MSG_LNK(0x010, hammer2_lnk_auth)
8c280d5d
MD
343#define HAMMER2_LNK_CONN HAMMER2_MSG_LNK(0x011, hammer2_lnk_conn)
344#define HAMMER2_LNK_SPAN HAMMER2_MSG_LNK(0x012, hammer2_lnk_span)
9ab15106
MD
345#define HAMMER2_LNK_ERROR HAMMER2_MSG_LNK(0xFFF, hammer2_msg_hdr)
346
347/*
8c280d5d
MD
348 * LNK_CONN - Register connection for SPAN (transaction, left open)
349 *
350 * One LNK_CONN transaction may be opened on a stream connection, registering
351 * the connection with the SPAN subsystem and allowing the subsystem to
352 * accept and relay SPANs to this connection.
353 *
354 * The LNK_CONN message may contain a filter, limiting the desireable SPANs.
355 *
356 * This message contains a lot of the same info that a SPAN message contains,
357 * but is not a SPAN. That is, without this message the SPAN subprotocol will
358 * not be executed on the connection, nor is this message a promise that the
359 * sending end is a client or node of a cluster.
360 */
81666e1b
MD
361struct hammer2_lnk_auth {
362 hammer2_msg_hdr_t head;
363 char dummy[64];
364};
365
8c280d5d
MD
366struct hammer2_lnk_conn {
367 hammer2_msg_hdr_t head;
368 uuid_t pfs_clid; /* rendezvous pfs uuid */
369 uuid_t pfs_fsid; /* unique pfs uuid */
370 uint8_t pfs_type; /* peer type */
371 uint8_t reserved01;
372 uint16_t proto_version; /* high level protocol support */
373 uint32_t status; /* status flags */
374 uint8_t reserved02[8];
32d51501 375 int32_t dist; /* span distance */
8c280d5d
MD
376 uint32_t reserved03[15];
377 char label[256]; /* PFS label (can be wildcard) */
378};
379
380typedef struct hammer2_lnk_conn hammer2_lnk_conn_t;
381
382/*
383 * LNK_SPAN - Relay a SPAN (transaction, left open)
9b8b748f
MD
384 *
385 * This message registers a PFS/PFS_TYPE with the other end of the connection,
386 * telling the other end who we are and what we can provide or what we want
387 * to consume. Multiple registrations can be maintained as open transactions
388 * with each one specifying a unique {source} linkid.
389 *
390 * Registrations are sent from {source}=S {1...n} to {target}=0 and maintained
391 * as open transactions. Registrations are also received and maintains as
392 * open transactions, creating a matrix of linkid's.
393 *
394 * While these transactions are open additional transactions can be executed
395 * between any two linkid's {source}=S (registrations we sent) to {target}=T
396 * (registrations we received).
397 *
398 * Closure of any registration transaction will automatically abort any open
399 * transactions using the related linkids. Closure can be initiated
400 * voluntarily from either side with either end issuing a DELETE, or they
401 * can be ABORTed.
402 *
403 * Status updates are performed via the open transaction.
404 *
405 * --
406 *
407 * A registration identifies a node and its various PFS parameters including
408 * the PFS_TYPE. For example, a diskless HAMMER2 client typically identifies
409 * itself as PFSTYPE_CLIENT.
410 *
411 * Any node may serve as a cluster controller, aggregating and passing
412 * on received registrations, but end-points do not have to implement this
413 * ability. Most end-points typically implement a single client-style or
414 * server-style PFS_TYPE and rendezvous at a cluster controller.
415 *
416 * The cluster controller does not aggregate/pass-on all received
417 * registrations. It typically filters what gets passed on based on
418 * what it receives.
419 *
420 * STATUS UPDATES: Status updates use the same structure but typically
421 * only contain incremental changes to pfs_type, with the
422 * label field containing a text status.
423 */
424struct hammer2_lnk_span {
425 hammer2_msg_hdr_t head;
8c280d5d 426 uuid_t pfs_clid; /* rendezvous pfs uuid */
9b8b748f
MD
427 uuid_t pfs_fsid; /* unique pfs uuid */
428 uint8_t pfs_type; /* peer type */
429 uint8_t reserved01;
430 uint16_t proto_version; /* high level protocol support */
431 uint32_t status; /* status flags */
432 uint8_t reserved02[8];
32d51501 433 int32_t dist; /* span distance */
8c280d5d 434 uint32_t reserved03[15];
9b8b748f
MD
435 char label[256]; /* PFS label (can be wildcard) */
436};
437
42e2a62e
MD
438typedef struct hammer2_lnk_span hammer2_lnk_span_t;
439
440#define HAMMER2_SPAN_PROTO_1 1
441
9b8b748f 442/*
9ab15106
MD
443 * Debug layer ops operate on any link
444 *
445 * SHELL - Persist stream, access the debug shell on the target
446 * registration. Multiple shells can be operational.
447 */
448#define HAMMER2_DBG_SHELL HAMMER2_MSG_DBG(0x001, hammer2_dbg_shell)
449
450struct hammer2_dbg_shell {
451 hammer2_msg_hdr_t head;
452};
453typedef struct hammer2_dbg_shell hammer2_dbg_shell_t;
454
455/*
9b8b748f
MD
456 * Domain layer ops operate on any link, link-0 may be used when the
457 * directory connected target is the desired registration.
458 *
459 * (nothing defined)
460 */
461
462/*
9ab15106
MD
463 * Cache layer ops operate on any link, link-0 may be used when the
464 * directly connected target is the desired registration.
465 *
466 * LOCK - Persist state, blockable, abortable.
467 *
468 * Obtain cache state (MODIFIED, EXCLUSIVE, SHARED, or INVAL)
469 * in any of three domains (TREE, INUM, ATTR, DIRENT) for a
470 * particular key relative to cache state already owned.
471 *
472 * TREE - Effects entire sub-tree at the specified element
473 * and will cause existing cache state owned by
474 * other nodes to be adjusted such that the request
475 * can be granted.
476 *
477 * INUM - Only effects inode creation/deletion of an existing
478 * element or a new element, by inumber and/or name.
479 * typically can be held for very long periods of time
480 * (think the vnode cache), directly relates to
481 * hammer2_chain structures representing inodes.
482 *
483 * ATTR - Only effects an inode's attributes, such as
484 * ownership, modes, etc. Used for lookups, chdir,
485 * open, etc. mtime has no affect.
486 *
487 * DIRENT - Only affects an inode's attributes plus the
488 * attributes or names related to any directory entry
489 * directly under this inode (non-recursively). Can
490 * be retained for medium periods of time when doing
491 * directory scans.
492 *
493 * This function may block and can be aborted. You may be
494 * granted cache state that is more broad than the state you
495 * requested (e.g. a different set of domains and/or an element
496 * at a higher layer in the tree). When quorum operations
497 * are used you may have to reconcile these grants to the
498 * lowest common denominator.
499 *
500 * In order to grant your request either you or the target
501 * (or both) may have to obtain a quorum agreement. Deadlock
502 * resolution may be required. When doing it yourself you
503 * will typically maintain an active message to each master
504 * node in the system. You can only grant the cache state
505 * when a quorum of nodes agree.
506 *
507 * The cache state includes transaction id information which
508 * can be used to resolve data requests.
509 */
510#define HAMMER2_CAC_LOCK HAMMER2_MSG_CAC(0x001, hammer2_cac_lock)
511
512/*
513 * Quorum layer ops operate on any link, link-0 may be used when the
514 * directly connected target is the desired registration.
515 *
516 * COMMIT - Persist state, blockable, abortable
517 *
518 * Issue a COMMIT in two phases. A quorum must acknowledge
519 * the operation to proceed to phase-2. Message-update to
520 * proceed to phase-2.
521 */
522#define HAMMER2_QRM_COMMIT HAMMER2_MSG_QRM(0x001, hammer2_qrm_commit)
523
524/*
8c280d5d
MD
525 * NOTE!!!! ALL EXTENDED HEADER STRUCTURES MUST BE 64-BYTE ALIGNED!!!
526 *
9ab15106
MD
527 * General message errors
528 *
529 * 0x00 - 0x1F Local iocomm errors
530 * 0x20 - 0x2F Global errors
531 */
81666e1b 532#define HAMMER2_MSG_ERR_NOSUPP 0x20
9ab15106 533
42e2a62e 534union hammer2_msg_any {
9ab15106
MD
535 char buf[HAMMER2_MSGHDR_MAX];
536 hammer2_msg_hdr_t head;
42e2a62e 537 hammer2_lnk_span_t lnk_span;
8c280d5d 538 hammer2_lnk_conn_t lnk_conn;
9ab15106
MD
539};
540
42e2a62e 541typedef union hammer2_msg_any hammer2_msg_any_t;
9ab15106
MD
542
543#endif