2 * Copyright (c) 2006,2012 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * CCMS - Cache Coherency Management System.
38 * This subsystem can be tied into a VFS in order to supply persistent
39 * cache management state for cluster or for remote cache-coherent operations.
41 * Local and cluster/remote cache state is maintained in a cache-coherent
42 * fashion as well as integrated into the VFS's inode locking subsystem
43 * (as a means of avoiding deadlocks).
45 * To operate properly the VFS must maintain a complete directory topology
46 * leading to any given vnode/inode either open or cached by the system.
47 * The vnode/namecache subsystem does not have to implement this but the
48 * VFS (aka HAMMER2) does.
50 * The filesystem embeds CCMS_CST structures in its internal inode
51 * representatino as needed and implements callback to allow CCMS to
52 * do topological recursions.
56 * The CCMS_CST structures represent granted cache and local locking states.
57 * Grants can be recursively inherited, minimizing protocol overhead in
58 * situations where there are no conflicts of interest.
62 * CCMS supports active front-end 'locks' on data objects utilizing the
63 * ccms_inode, key, and desired cache state. It can grant the lock based
64 * on inherited CST state and prevents downgrading of the CST by other
65 * parties or threads while the lock is held. The CST's arranged
66 * lock within the embedded CCMS_INODE and ref-counts the related CST.
73 #include <sys/types.h>
76 #include <sys/param.h>
78 #ifndef _SYS_SERIALIZE_H_
79 #include <sys/serialize.h>
81 #ifndef _SYS_SPINLOCK_H_
82 #include <sys/spinlock.h>
85 typedef uint64_t ccms_key_t;
86 typedef uint64_t ccms_tid_t;
87 typedef uint8_t ccms_state_t;
88 typedef uint8_t ccms_type_t;
94 * CCMS_STATE_T - CCMS cache states.
96 * INVALID - Cache state is unknown and must be acquired.
98 * ALLOWED - Cache state allows any recursive state to be acquired.
100 * SHARED - Cache state allows shared access. If this is a topo_cst
101 * only INVALID or SHARED recursive states are allowed.
103 * EXCLUSIVE - Cache state allows exclusive access. If this is a
104 * topo_cst then INVALID, SHARED, or EXCLUSIVE recursive
107 * CCMS Implements an extended MESI model. The extensions are implemented
108 * as CCMS_TYPE_T flags.
110 #define CCMS_STATE_INVALID 0 /* unknown cache state */
111 #define CCMS_STATE_ALLOWED 1 /* allow subsystem (topo only) */
112 #define CCMS_STATE_SHARED 2 /* clean, shared, read-only */
113 #define CCMS_STATE_EXCLUSIVE 3 /* clean, exclusive, read-only */
118 * INHERITED - Indicates the state field was inherited and was not directly
119 * granted by the cluster controller.
121 * MODIFIED - This is a type-field flag associated with an EXCLUSIVE cache
124 * MASTER - This is a type-field flag associated with an EXCLUSIVE+MODIFIED
125 * cache state which indicates that slaves might be present
126 * which are caching our unsynchronized state.
128 * SLAVE - This is a type-field flag associated with the SHARED cache
129 * state which indicates that the data present in our memory
130 * caches is being mastered elsewhere and has not been
131 * synchronized (meaning no quorum protocol has been run to
132 * sync the data yet). Thus only the version of the data in
133 * our memory and its originator is valid.
135 * QSLAVE - This indicates that the slaved data is also present in the
136 * memory caches of a quorum of master nodes.
138 #define CCMS_TYPE_INHERITED 0x01
139 #define CCMS_TYPE_MODIFIED 0x02
140 #define CCMS_TYPE_MASTER 0x04
141 #define CCMS_TYPE_SLAVE 0x08
142 #define CCMS_TYPE_QSALVE 0x10
143 #define CCMS_TYPE_RECURSIVE 0x80
146 * CCMS_LOCK - High level active lock
148 * This represents a high level locking request, such as used by
149 * read, write, and attribute operations. Initialize the ccms_lock
150 * structure and call ccms_lock_get().
152 * When a CCMS lock is established the cache state of the underlying elements
153 * is adjusted to meet the requirements of the lock. The cache state
154 * requirements are infered by the lock type. CCMS locks can block on
155 * third party interactions if the underlying remote cache state is not
158 * CCMS data locks imply a shared CCMS inode lock. A CCMS topology lock does
159 * not imply a data or inode lock but topology locks can have far-reaching
160 * effects such as block ccms_locks on multiple inodes.
163 TAILQ_ENTRY(ccms_lock) entry;
168 struct ccms_cst *topo_cst;
169 struct ccms_cst *attr_cst;
170 struct ccms_cst *data_cst;
171 ccms_key_t key_beg; /* applies to dstate */
172 ccms_key_t key_end; /* applies to dstate */
175 #define CCMS_LOCK_FAILED 0x01
178 * CCMS_CST - Low level locking state, persistent cache state
180 * Offset ranges are byte-inclusive, allowing the entire 64 bit data space
181 * to be represented without overflowing the edge case. For example, a
182 * 64 byte area might be represented as (0,63). The offsets are UNSIGNED
185 * High level CST locks must be obtained top-down.
187 * count - Negative value indicates active exclusive lock, positive value
188 * indicates active shared lock.
190 * spin - Structural spinlock, typically just one is held at a time.
191 * However, to complement the top-down nature of the higher level
192 * lock we allow the spin lock to be held recursively in a bottom-up
193 * fashion for race-to-root flags updates and lastdrop iterations.
196 struct spinlock spin; /* thread spinlock */
197 void *handle; /* opaque VFS handle */
198 ccms_state_t state; /* granted or inherited state */
199 ccms_type_t type; /* CST type and flags */
203 ccms_tid_t path_id; /* rendezvous inode id */
204 ccms_tid_t tid; /* [meta]data versioning id */
205 ccms_key_t key_beg; /* key range (inclusive) */
206 ccms_key_t key_end; /* key range (inclusive) */
208 int32_t upgrade; /* upgrades pending */
209 int32_t count; /* active shared/exclusive count */
210 int32_t blocked; /* wakeup blocked on release */
211 thread_t td; /* if excl lock (count < 0) */
215 * Domain management, contains a pseudo-root for the CCMS topology.
218 int cst_count; /* dynamic cst count */
219 int cst_limit; /* dynamic cst limit */
222 typedef struct ccms_lock ccms_lock_t;
223 typedef struct ccms_cst ccms_cst_t;
224 typedef struct ccms_domain ccms_domain_t;
231 void ccms_domain_init(ccms_domain_t *dom);
232 void ccms_domain_uninit(ccms_domain_t *dom);
233 void ccms_cst_init(ccms_cst_t *cst, void *handle);
234 void ccms_cst_uninit(ccms_cst_t *cst);
236 void ccms_thread_lock(ccms_cst_t *cst, ccms_state_t state);
237 int ccms_thread_lock_nonblock(ccms_cst_t *cst, ccms_state_t state);
238 ccms_state_t ccms_thread_lock_temp_release(ccms_cst_t *cst);
239 void ccms_thread_lock_temp_restore(ccms_cst_t *cst, ccms_state_t ostate);
240 ccms_state_t ccms_thread_lock_upgrade(ccms_cst_t *cst);
241 void ccms_thread_lock_downgrade(ccms_cst_t *cst, ccms_state_t ostate);
242 void ccms_thread_unlock(ccms_cst_t *cst);
243 void ccms_thread_unlock_upgraded(ccms_cst_t *cst, ccms_state_t ostate);
244 /*int ccms_thread_unlock_zero(ccms_cst_t *cst);*/
245 int ccms_thread_lock_owned(ccms_cst_t *cst);
246 void ccms_thread_lock_setown(ccms_cst_t *cst);
248 void ccms_lock_get(ccms_lock_t *lock);
249 void ccms_lock_put(ccms_lock_t *lock);