hammer2 - Refactor frontend part 15/many
[dragonfly.git] / sys / vfs / hammer2 / hammer2_cluster.c
1 /*
2  * Copyright (c) 2013-2015 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * The cluster module collects multiple chains representing the same
36  * information from different nodes into a single entity.  It allows direct
37  * access to media data as long as it is not blockref array data (which
38  * will obviously have to be different at each node).
39  *
40  * This module also handles I/O dispatch, status rollup, and various
41  * mastership arrangements including quorum operations.  It effectively
42  * presents one topology to the vnops layer.
43  *
44  * Many of the API calls mimic chain API calls but operate on clusters
45  * instead of chains.  Please see hammer2_chain.c for more complete code
46  * documentation of the API functions.
47  *
48  * WARNING! This module is *extremely* complex.  It must issue asynchronous
49  *          locks and I/O, do quorum and/or master-slave processing, and
50  *          it must operate properly even if some nodes are broken (which
51  *          can also mean indefinite locks).
52  *
53  *                              CLUSTER OPERATIONS
54  *
55  * Cluster operations can be broken down into three pieces:
56  *
57  * (1) Chain locking and data retrieval.
58  *              hammer2_cluster_lock()
59  *              hammer2_cluster_parent()
60  *
61  *      - Most complex functions, quorum management on transaction ids.
62  *
63  *      - Locking and data accesses must be internally asynchronous.
64  *
65  *      - Validate and manage cache coherency primitives (cache state
66  *        is stored in chain topologies but must be validated by these
67  *        functions).
68  *
69  * (2) Lookups and Scans
70  *              hammer2_cluster_lookup()
71  *              hammer2_cluster_next()
72  *
73  *      - Depend on locking & data retrieval functions, but still complex.
74  *
75  *      - Must do quorum management on transaction ids.
76  *
77  *      - Lookup and Iteration ops Must be internally asynchronous.
78  *
79  * (3) Modifying Operations
80  *              hammer2_cluster_create()
81  *              hammer2_cluster_rename()
82  *              hammer2_cluster_delete()
83  *              hammer2_cluster_modify()
84  *              hammer2_cluster_modsync()
85  *
86  *      - Can usually punt on failures, operation continues unless quorum
87  *        is lost.  If quorum is lost, must wait for resynchronization
88  *        (depending on the management mode).
89  *
90  *      - Must disconnect node on failures (also not flush), remount, and
91  *        resynchronize.
92  *
93  *      - Network links (via kdmsg) are relatively easy to issue as the
94  *        complex underworkings of hammer2_chain.c don't have to messed
95  *        with (the protocol is at a higher level than block-level).
96  *
97  *      - Multiple local disk nodes (i.e. block devices) are another matter.
98  *        Chain operations have to be dispatched to per-node threads (xN)
99  *        because we can't asynchronize potentially very complex chain
100  *        operations in hammer2_chain.c (it would be a huge mess).
101  *
102  *        (these threads are also used to terminate incoming kdmsg ops from
103  *        other machines).
104  *
105  *      - Single-node filesystems do not use threads and will simply call
106  *        hammer2_chain.c functions directly.  This short-cut is handled
107  *        at the base of each cluster function.
108  */
109 #include <sys/cdefs.h>
110 #include <sys/param.h>
111 #include <sys/systm.h>
112 #include <sys/types.h>
113 #include <sys/lock.h>
114 #include <sys/uuid.h>
115
116 #include "hammer2.h"
117
118 /*
119  * Returns the bref type of the cluster's foucs.
120  *
121  * If the cluster is errored, returns HAMMER2_BREF_TYPE_EMPTY (0).
122  * The cluster must be locked.
123  */
124 uint8_t
125 hammer2_cluster_type(hammer2_cluster_t *cluster)
126 {
127         if (cluster->error == 0) {
128                 KKASSERT(cluster->focus != NULL);
129                 return(cluster->focus->bref.type);
130         }
131         return 0;
132 }
133
134 /*
135  * Returns non-zero if the cluster's focus is flagged as being modified.
136  *
137  * If the cluster is errored, returns 0.
138  */
139 static
140 int
141 hammer2_cluster_modified(hammer2_cluster_t *cluster)
142 {
143         if (cluster->error == 0) {
144                 KKASSERT(cluster->focus != NULL);
145                 return((cluster->focus->flags & HAMMER2_CHAIN_MODIFIED) != 0);
146         }
147         return 0;
148 }
149
150 /*
151  * Returns the bref of the cluster's focus, sans any data-offset information
152  * (since offset information is per-node and wouldn't be useful).
153  *
154  * Callers use this function to access modify_tid, mirror_tid, type,
155  * key, and keybits.
156  *
157  * If the cluster is errored, returns an empty bref.
158  * The cluster must be locked.
159  */
160 void
161 hammer2_cluster_bref(hammer2_cluster_t *cluster, hammer2_blockref_t *bref)
162 {
163         if (cluster->error == 0) {
164                 KKASSERT(cluster->focus != NULL);
165                 *bref = cluster->focus->bref;
166                 bref->data_off = 0;
167         } else {
168                 bzero(bref, sizeof(*bref));
169         }
170 }
171
172 /*
173  * Set the check mode for the cluster.
174  * Errored elements of the cluster are ignored.
175  *
176  * The cluster must be locked and modified.
177  */
178 void
179 hammer2_cluster_setmethod_check(hammer2_cluster_t *cluster, int check_algo)
180 {
181         hammer2_chain_t *chain;
182         int i;
183
184         KKASSERT(cluster->flags & HAMMER2_CLUSTER_LOCKED);
185         for (i = 0; i < cluster->nchains; ++i) {
186                 if ((cluster->array[i].flags & HAMMER2_CITEM_FEMOD) == 0) {
187                         cluster->array[i].flags |= HAMMER2_CITEM_INVALID;
188                         continue;
189                 }
190                 chain = cluster->array[i].chain;
191                 if (chain == NULL)
192                         continue;
193                 if (chain->error)
194                         continue;
195                 KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED);
196                 chain->bref.methods &= ~HAMMER2_ENC_CHECK(-1);
197                 chain->bref.methods |= HAMMER2_ENC_CHECK(check_algo);
198         }
199 }
200
201 /*
202  * Create a degenerate cluster with one ref from a single locked chain.
203  * The returned cluster will be focused on the chain and inherit its
204  * error state.
205  *
206  * The chain's lock and reference are transfered to the new cluster, so
207  * the caller should not try to unlock the chain separately.
208  *
209  * We fake the flags.
210  */
211 hammer2_cluster_t *
212 hammer2_cluster_from_chain(hammer2_chain_t *chain)
213 {
214         hammer2_cluster_t *cluster;
215
216         cluster = kmalloc(sizeof(*cluster), M_HAMMER2, M_WAITOK | M_ZERO);
217         cluster->array[0].chain = chain;
218         cluster->array[0].flags = HAMMER2_CITEM_FEMOD;
219         cluster->nchains = 1;
220         cluster->focus = chain;
221         cluster->focus_index = 0;
222         cluster->pmp = chain->pmp;
223         cluster->refs = 1;
224         cluster->error = chain->error;
225         cluster->flags = HAMMER2_CLUSTER_LOCKED |
226                          HAMMER2_CLUSTER_WRHARD |
227                          HAMMER2_CLUSTER_RDHARD |
228                          HAMMER2_CLUSTER_MSYNCED |
229                          HAMMER2_CLUSTER_SSYNCED;
230
231         return cluster;
232 }
233
234 /*
235  * Add a reference to a cluster and its underlying chains.
236  *
237  * We must also ref the underlying chains in order to allow ref/unlock
238  * sequences to later re-lock.
239  */
240 void
241 hammer2_cluster_ref(hammer2_cluster_t *cluster)
242 {
243         atomic_add_int(&cluster->refs, 1);
244 }
245
246 /*
247  * Drop the caller's reference to the cluster.  When the ref count drops to
248  * zero this function frees the cluster and drops all underlying chains.
249  *
250  * In-progress read I/Os are typically detached from the cluster once the
251  * first one returns (the remaining stay attached to the DIOs but are then
252  * ignored and drop naturally).
253  */
254 void
255 hammer2_cluster_drop(hammer2_cluster_t *cluster)
256 {
257         hammer2_chain_t *chain;
258         int i;
259
260         KKASSERT(cluster->refs > 0);
261         if (atomic_fetchadd_int(&cluster->refs, -1) == 1) {
262                 cluster->focus = NULL;          /* safety XXX chg to assert */
263                 cluster->focus_index = 0;
264
265                 for (i = 0; i < cluster->nchains; ++i) {
266                         chain = cluster->array[i].chain;
267                         if (chain) {
268                                 hammer2_chain_drop(chain);
269                                 cluster->array[i].chain = NULL; /* safety */
270                         }
271                 }
272                 cluster->nchains = 0;                           /* safety */
273
274                 kfree(cluster, M_HAMMER2);
275                 /* cluster is invalid */
276         }
277 }
278
279 /*
280  * Lock a cluster.  Cluster must already be referenced.  Focus is maintained. 
281  *
282  * WARNING! This function expects the caller to handle resolution of the
283  *          cluster.  We never re-resolve the cluster in this function,
284  *          because it might be used to temporarily unlock/relock a cparent
285  *          in an iteration or recursrion, and the cparents elements do not
286  *          necessarily match.
287  */
288 void
289 hammer2_cluster_lock_except(hammer2_cluster_t *cluster, int idx, int how)
290 {
291         hammer2_chain_t *chain;
292         int i;
293
294         /* cannot be on inode-embedded cluster template, must be on copy */
295         KKASSERT(cluster->refs > 0);
296         KKASSERT((cluster->flags & HAMMER2_CLUSTER_INODE) == 0);
297         if (cluster->flags & HAMMER2_CLUSTER_LOCKED) {
298                 panic("hammer2_cluster_lock: cluster %p already locked!\n",
299                         cluster);
300         }
301         atomic_set_int(&cluster->flags, HAMMER2_CLUSTER_LOCKED);
302
303         /*
304          * Lock chains and resolve state.
305          */
306         for (i = 0; i < cluster->nchains; ++i) {
307                 if (i == idx)
308                         continue;
309                 chain = cluster->array[i].chain;
310                 if (chain == NULL)
311                         continue;
312                 hammer2_chain_lock(chain, how);
313         }
314 }
315
316 void
317 hammer2_cluster_lock(hammer2_cluster_t *cluster, int how)
318 {
319         hammer2_cluster_lock_except(cluster, -1, how);
320 }
321
322 /*
323  * Calculate the clustering state for the cluster and set its focus.
324  * This routine must be called with care.  For example, it should not
325  * normally be called after relocking a non-leaf cluster because parent
326  * clusters help iterations and each element might be at a slightly different
327  * indirect node (each node's topology is independently indexed).
328  *
329  * HAMMER2_CITEM_FEMOD flags which elements can be modified by normal
330  * operations.  Typically this is only set on a quorum of MASTERs or
331  * on a SOFT_MASTER.  Also as a degenerate case on SUPROOT.  If a SOFT_MASTER
332  * is present, this bit is *not* set on a quorum of MASTERs.  The
333  * synchronization code ignores this bit, but all hammer2_cluster_*() calls
334  * that create/modify/delete elements use it.
335  *
336  * The chains making up the cluster may be narrowed down based on quorum
337  * acceptability, and if RESOLVE_RDONLY is specified the chains can be
338  * narrowed down to a single chain as long as the entire subtopology is known
339  * to be intact.  So, for example, we can narrow a read-only op to a single
340  * fast SLAVE but if we focus a CACHE chain we must still retain at least
341  * a SLAVE to ensure that the subtopology can be accessed.
342  *
343  * RESOLVE_RDONLY operations are effectively as-of so the quorum does not need
344  * to be maintained once the topology is validated as-of the top level of
345  * the operation.
346  *
347  * If a failure occurs the operation must be aborted by higher-level code and
348  * retried. XXX
349  */
350 void
351 hammer2_cluster_resolve(hammer2_cluster_t *cluster)
352 {
353         hammer2_chain_t *chain;
354         hammer2_chain_t *focus;
355         hammer2_pfs_t *pmp;
356         hammer2_tid_t quorum_tid;
357         hammer2_tid_t last_best_quorum_tid;
358         int focus_pfs_type;
359         uint32_t nflags;
360         int ttlmasters;
361         int ttlslaves;
362         int nmasters;
363         int nslaves;
364         int nquorum;
365         int smpresent;
366         int i;
367
368         cluster->error = 0;
369         cluster->focus = NULL;
370
371         focus_pfs_type = 0;
372         nflags = 0;
373         ttlmasters = 0;
374         ttlslaves = 0;
375         nmasters = 0;
376         nslaves = 0;
377
378         /*
379          * Calculate quorum
380          */
381         pmp = cluster->pmp;
382         KKASSERT(pmp != NULL || cluster->nchains == 0);
383         nquorum = pmp ? pmp->pfs_nmasters / 2 + 1 : 0;
384         smpresent = 0;
385
386         /*
387          * Pass 1
388          *
389          * NOTE: A NULL chain is not necessarily an error, it could be
390          *       e.g. a lookup failure or the end of an iteration.
391          *       Process normally.
392          */
393         for (i = 0; i < cluster->nchains; ++i) {
394                 chain = cluster->array[i].chain;
395                 if (chain && chain->error) {
396                         if (cluster->focus == NULL || cluster->focus == chain) {
397                                 /* error will be overridden by valid focus */
398                                 cluster->error = chain->error;
399                         }
400
401                         /*
402                          * Must count total masters and slaves whether the
403                          * chain is errored or not.
404                          */
405                         switch (cluster->pmp->pfs_types[i]) {
406                         case HAMMER2_PFSTYPE_MASTER:
407                                 ++ttlmasters;
408                                 break;
409                         case HAMMER2_PFSTYPE_SLAVE:
410                                 ++ttlslaves;
411                                 break;
412                         }
413                         continue;
414                 }
415                 switch (cluster->pmp->pfs_types[i]) {
416                 case HAMMER2_PFSTYPE_MASTER:
417                         ++ttlmasters;
418                         break;
419                 case HAMMER2_PFSTYPE_SLAVE:
420                         ++ttlslaves;
421                         break;
422                 case HAMMER2_PFSTYPE_SOFT_MASTER:
423                         nflags |= HAMMER2_CLUSTER_WRSOFT;
424                         nflags |= HAMMER2_CLUSTER_RDSOFT;
425                         smpresent = 1;
426                         break;
427                 case HAMMER2_PFSTYPE_SOFT_SLAVE:
428                         nflags |= HAMMER2_CLUSTER_RDSOFT;
429                         break;
430                 case HAMMER2_PFSTYPE_SUPROOT:
431                         /*
432                          * Degenerate cluster representing the super-root
433                          * topology on a single device.  Fake stuff so
434                          * cluster ops work as expected.
435                          */
436                         nflags |= HAMMER2_CLUSTER_WRHARD;
437                         nflags |= HAMMER2_CLUSTER_RDHARD;
438                         cluster->focus_index = i;
439                         cluster->focus = chain;
440                         cluster->error = chain ? chain->error : 0;
441                         break;
442                 default:
443                         break;
444                 }
445         }
446
447         /*
448          * Pass 2
449          *
450          * Resolve masters.  Calculate nmasters for the highest matching
451          * TID, if a quorum cannot be attained try the next lower matching
452          * TID until we exhaust TIDs.
453          *
454          * NOTE: A NULL chain is not necessarily an error, it could be
455          *       e.g. a lookup failure or the end of an iteration.
456          *       Process normally.
457          */
458         last_best_quorum_tid = HAMMER2_TID_MAX;
459         quorum_tid = 0;         /* fix gcc warning */
460
461         while (nmasters < nquorum && last_best_quorum_tid != 0) {
462                 nmasters = 0;
463                 quorum_tid = 0;
464
465                 for (i = 0; i < cluster->nchains; ++i) {
466                         if (cluster->pmp->pfs_types[i] !=
467                             HAMMER2_PFSTYPE_MASTER) {
468                                 continue;
469                         }
470                         chain = cluster->array[i].chain;
471
472                         if (cluster->array[i].flags & HAMMER2_CITEM_INVALID) {
473                                 /*
474                                  * Invalid as in unsynchronized, cannot be
475                                  * used to calculate the quorum.
476                                  */
477                         } else if (chain == NULL && quorum_tid == 0) {
478                                 /*
479                                  * NULL chain on master matches NULL chains
480                                  * on other masters.
481                                  */
482                                 ++nmasters;
483                         } else if (quorum_tid < last_best_quorum_tid &&
484                                    chain != NULL &&
485                                    (quorum_tid < chain->bref.modify_tid ||
486                                     nmasters == 0)) {
487                                 /*
488                                  * Better TID located, reset nmasters count.
489                                  */
490                                 nmasters = 1;
491                                 quorum_tid = chain->bref.modify_tid;
492                         } else if (chain &&
493                                    quorum_tid == chain->bref.modify_tid) {
494                                 /*
495                                  * TID matches current collection.
496                                  */
497                                 ++nmasters;
498                         }
499                 }
500                 if (nmasters >= nquorum)
501                         break;
502                 last_best_quorum_tid = quorum_tid;
503         }
504
505         /*
506          * Pass 3
507          *
508          * NOTE: A NULL chain is not necessarily an error, it could be
509          *       e.g. a lookup failure or the end of an iteration.
510          *       Process normally.
511          */
512         for (i = 0; i < cluster->nchains; ++i) {
513                 cluster->array[i].flags &= ~HAMMER2_CITEM_FEMOD;
514                 chain = cluster->array[i].chain;
515                 if (chain && chain->error) {
516                         if (cluster->focus == NULL || cluster->focus == chain) {
517                                 /* error will be overridden by valid focus */
518                                 cluster->error = chain->error;
519                         }
520                         continue;
521                 }
522
523                 switch (cluster->pmp->pfs_types[i]) {
524                 case HAMMER2_PFSTYPE_MASTER:
525                         /*
526                          * We must have enough up-to-date masters to reach
527                          * a quorum and the master modify_tid must match
528                          * the quorum's modify_tid.
529                          *
530                          * Do not select an errored or out-of-sync master.
531                          */
532                         if (cluster->array[i].flags & HAMMER2_CITEM_INVALID) {
533                                 nflags |= HAMMER2_CLUSTER_UNHARD;
534                         } else if (nmasters >= nquorum &&
535                                    (chain == NULL || chain->error == 0) &&
536                                    ((chain == NULL && quorum_tid == 0) ||
537                                     (chain != NULL && quorum_tid ==
538                                                   chain->bref.modify_tid))) {
539                                 nflags |= HAMMER2_CLUSTER_WRHARD;
540                                 nflags |= HAMMER2_CLUSTER_RDHARD;
541                                 if (!smpresent) {
542                                         cluster->array[i].flags |=
543                                                         HAMMER2_CITEM_FEMOD;
544                                 }
545                                 if (cluster->focus == NULL ||
546                                     focus_pfs_type == HAMMER2_PFSTYPE_SLAVE) {
547                                         focus_pfs_type = HAMMER2_PFSTYPE_MASTER;
548                                         cluster->focus_index = i;
549                                         cluster->focus = chain; /* NULL ok */
550                                         cluster->error = chain ? chain->error :
551                                                                  0;
552                                 }
553                         } else if (chain == NULL || chain->error == 0) {
554                                 nflags |= HAMMER2_CLUSTER_UNHARD;
555                         }
556                         break;
557                 case HAMMER2_PFSTYPE_SLAVE:
558                         /*
559                          * We must have enough up-to-date masters to reach
560                          * a quorum and the slave modify_tid must match the
561                          * quorum's modify_tid.
562                          *
563                          * Do not select an errored slave.
564                          */
565                         if (cluster->array[i].flags & HAMMER2_CITEM_INVALID) {
566                                 nflags |= HAMMER2_CLUSTER_UNHARD;
567                         } else if (nmasters >= nquorum &&
568                                    (chain == NULL || chain->error == 0) &&
569                                    ((chain == NULL && quorum_tid == 0) ||
570                                     (chain && quorum_tid ==
571                                               chain->bref.modify_tid))) {
572                                 ++nslaves;
573                                 nflags |= HAMMER2_CLUSTER_RDHARD;
574 #if 0
575                                 /* XXX optimize for RESOLVE_RDONLY */
576                                 if (cluster->focus == NULL) {
577                                         focus_pfs_type = HAMMER2_PFSTYPE_SLAVE;
578                                         cluster->focus_index = i;
579                                         cluster->focus = chain; /* NULL ok */
580                                         cluster->error = chain ? chain->error :
581                                                                  0;
582                                 }
583 #endif
584                         } else if (chain == NULL || chain->error == 0) {
585                                 nflags |= HAMMER2_CLUSTER_UNSOFT;
586                         }
587                         break;
588                 case HAMMER2_PFSTYPE_SOFT_MASTER:
589                         /*
590                          * Directly mounted soft master always wins.  There
591                          * should be only one.
592                          */
593                         KKASSERT(focus_pfs_type != HAMMER2_PFSTYPE_SOFT_MASTER);
594                         cluster->focus_index = i;
595                         cluster->focus = chain;
596                         cluster->error = chain ? chain->error : 0;
597                         focus_pfs_type = HAMMER2_PFSTYPE_SOFT_MASTER;
598                         cluster->array[i].flags |= HAMMER2_CITEM_FEMOD;
599                         break;
600                 case HAMMER2_PFSTYPE_SOFT_SLAVE:
601                         /*
602                          * Directly mounted soft slave always wins.  There
603                          * should be only one.
604                          */
605                         KKASSERT(focus_pfs_type != HAMMER2_PFSTYPE_SOFT_SLAVE);
606                         if (focus_pfs_type != HAMMER2_PFSTYPE_SOFT_MASTER) {
607                                 cluster->focus_index = i;
608                                 cluster->focus = chain;
609                                 cluster->error = chain ? chain->error : 0;
610                                 focus_pfs_type = HAMMER2_PFSTYPE_SOFT_SLAVE;
611                         }
612                         break;
613                 case HAMMER2_PFSTYPE_SUPROOT:
614                         /*
615                          * spmp (degenerate case)
616                          */
617                         KKASSERT(i == 0);
618                         cluster->focus_index = i;
619                         cluster->focus = chain;
620                         cluster->error = chain ? chain->error : 0;
621                         focus_pfs_type = HAMMER2_PFSTYPE_SUPROOT;
622                         cluster->array[i].flags |= HAMMER2_CITEM_FEMOD;
623                         break;
624                 default:
625                         break;
626                 }
627         }
628
629         /*
630          * Focus now set, adjust ddflag.  Skip this pass if the focus
631          * is bad or if we are at the PFS root (the bref won't match at
632          * the PFS root, obviously).
633          */
634         focus = cluster->focus;
635         if (focus) {
636                 cluster->ddflag =
637                         (cluster->focus->bref.type == HAMMER2_BREF_TYPE_INODE);
638         } else {
639                 cluster->ddflag = 0;
640                 goto skip4;
641         }
642         if (cluster->focus->flags & HAMMER2_CHAIN_PFSBOUNDARY)
643                 goto skip4;
644
645         /*
646          * Pass 4
647          *
648          * Validate the elements that were not marked invalid.  They should
649          * match.
650          */
651         for (i = 0; i < cluster->nchains; ++i) {
652                 int ddflag;
653
654                 chain = cluster->array[i].chain;
655
656                 if (chain == NULL)
657                         continue;
658                 if (chain == focus)
659                         continue;
660                 if (cluster->array[i].flags & HAMMER2_CITEM_INVALID)
661                         continue;
662
663                 ddflag = (chain->bref.type == HAMMER2_BREF_TYPE_INODE);
664                 if (chain->bref.type != focus->bref.type ||
665                     chain->bref.key != focus->bref.key ||
666                     chain->bref.keybits != focus->bref.keybits ||
667                     chain->bref.modify_tid != focus->bref.modify_tid ||
668                     chain->bytes != focus->bytes ||
669                     ddflag != cluster->ddflag) {
670                         cluster->array[i].flags |= HAMMER2_CITEM_INVALID;
671                         if (hammer2_debug & 1)
672                         kprintf("cluster_resolve: matching modify_tid failed "
673                                 "bref test: idx=%d type=%02x/%02x "
674                                 "key=%016jx/%d-%016jx/%d "
675                                 "mod=%016jx/%016jx bytes=%u/%u\n",
676                                 i,
677                                 chain->bref.type, focus->bref.type,
678                                 chain->bref.key, chain->bref.keybits,
679                                 focus->bref.key, focus->bref.keybits,
680                                 chain->bref.modify_tid, focus->bref.modify_tid,
681                                 chain->bytes, focus->bytes);
682                         if (hammer2_debug & 0x4000)
683                                 panic("cluster_resolve");
684                         /* flag issue and force resync? */
685                 }
686         }
687 skip4:
688
689         if (ttlslaves == 0)
690                 nflags |= HAMMER2_CLUSTER_NOSOFT;
691         if (ttlmasters == 0)
692                 nflags |= HAMMER2_CLUSTER_NOHARD;
693
694         /*
695          * Set SSYNCED or MSYNCED for slaves and masters respectively if
696          * all available nodes (even if 0 are available) are fully
697          * synchronized.  This is used by the synchronization thread to
698          * determine if there is work it could potentially accomplish.
699          */
700         if (nslaves == ttlslaves)
701                 nflags |= HAMMER2_CLUSTER_SSYNCED;
702         if (nmasters == ttlmasters)
703                 nflags |= HAMMER2_CLUSTER_MSYNCED;
704
705         /*
706          * Determine if the cluster was successfully locked for the
707          * requested operation and generate an error code.  The cluster
708          * will not be locked (or ref'd) if an error is returned.
709          *
710          * Caller can use hammer2_cluster_rdok() and hammer2_cluster_wrok()
711          * to determine if reading or writing is possible.  If writing, the
712          * cluster still requires a call to hammer2_cluster_modify() first.
713          */
714         atomic_set_int(&cluster->flags, nflags);
715         atomic_clear_int(&cluster->flags, HAMMER2_CLUSTER_ZFLAGS & ~nflags);
716 }
717
718 /*
719  * This is used by the XOPS subsystem to calculate the state of
720  * the collection and tell hammer2_xop_collect() what to do with it.
721  * The collection can be in various states of desynchronization, the
722  * caller specifically wants to resolve the passed-in key.
723  *
724  * Return values:
725  *      0               - Quorum agreement, key is valid
726  *
727  *      ENOENT          - Quorum agreement, end of scan
728  *
729  *      ESRCH           - Quorum agreement, key is INVALID (caller should
730  *                        skip key).
731  *
732  *      EIO             - Quorum agreement but all elements had errors.
733  *
734  *      EDEADLK         - No quorum agreement possible for key, a repair
735  *                        may be needed.  Caller has to decide what to do,
736  *                        possibly iterating the key or generating an EIO.
737  *
738  *      EINPROGRESS     - No quorum agreement yet, but agreement is still
739  *                        possible if caller waits for more responses.  Caller
740  *                        should not iterate key.
741  *
742  * XXX needs to handle SOFT_MASTER and SOFT_SLAVE
743  */
744 int
745 hammer2_cluster_check(hammer2_cluster_t *cluster, hammer2_key_t key, int flags)
746 {
747         hammer2_chain_t *chain;
748         hammer2_chain_t *focus;
749         hammer2_pfs_t *pmp;
750         hammer2_tid_t quorum_tid;
751         hammer2_tid_t last_best_quorum_tid;
752         uint32_t nflags;
753         int ttlmasters;
754         int ttlslaves;
755         int nmasters;
756         int nmasters_keymatch;
757         int nslaves;
758         int nquorum;
759         int umasters;   /* unknown masters (still in progress) */
760         int smpresent;
761         int i;
762
763         cluster->error = 0;
764         cluster->focus = NULL;
765
766         nflags = 0;
767         ttlmasters = 0;
768         ttlslaves = 0;
769         nmasters = 0;
770         nmasters_keymatch = 0;
771         umasters = 0;
772         nslaves = 0;
773
774         /*
775          * Calculate quorum
776          */
777         pmp = cluster->pmp;
778         KKASSERT(pmp != NULL || cluster->nchains == 0);
779         nquorum = pmp ? pmp->pfs_nmasters / 2 + 1 : 0;
780         smpresent = 0;
781
782         /*
783          * Pass 1
784          *
785          * NOTE: A NULL chain is not necessarily an error, it could be
786          *       e.g. a lookup failure or the end of an iteration.
787          *       Process normally.
788          */
789         for (i = 0; i < cluster->nchains; ++i) {
790                 cluster->array[i].flags &= ~HAMMER2_CITEM_FEMOD;
791                 cluster->array[i].flags |= HAMMER2_CITEM_INVALID;
792
793                 chain = cluster->array[i].chain;
794                 if (chain && chain->error) {
795                         if (cluster->focus == NULL || cluster->focus == chain) {
796                                 /* error will be overridden by valid focus */
797                                 cluster->error = chain->error;
798                         }
799
800                         /*
801                          * Must count total masters and slaves whether the
802                          * chain is errored or not.
803                          */
804                         switch (cluster->pmp->pfs_types[i]) {
805                         case HAMMER2_PFSTYPE_MASTER:
806                                 ++ttlmasters;
807                                 break;
808                         case HAMMER2_PFSTYPE_SLAVE:
809                                 ++ttlslaves;
810                                 break;
811                         }
812                         continue;
813                 }
814                 switch (cluster->pmp->pfs_types[i]) {
815                 case HAMMER2_PFSTYPE_MASTER:
816                         ++ttlmasters;
817                         break;
818                 case HAMMER2_PFSTYPE_SLAVE:
819                         ++ttlslaves;
820                         break;
821                 case HAMMER2_PFSTYPE_SOFT_MASTER:
822                         nflags |= HAMMER2_CLUSTER_WRSOFT;
823                         nflags |= HAMMER2_CLUSTER_RDSOFT;
824                         smpresent = 1;
825                         break;
826                 case HAMMER2_PFSTYPE_SOFT_SLAVE:
827                         nflags |= HAMMER2_CLUSTER_RDSOFT;
828                         break;
829                 case HAMMER2_PFSTYPE_SUPROOT:
830                         /*
831                          * Degenerate cluster representing the super-root
832                          * topology on a single device.  Fake stuff so
833                          * cluster ops work as expected.
834                          */
835                         nflags |= HAMMER2_CLUSTER_WRHARD;
836                         nflags |= HAMMER2_CLUSTER_RDHARD;
837                         cluster->focus_index = i;
838                         cluster->focus = chain;
839                         cluster->error = chain ? chain->error : 0;
840                         break;
841                 default:
842                         break;
843                 }
844         }
845
846         /*
847          * Pass 2
848          *
849          * Resolve nmasters             - master nodes fully match
850          *
851          * Resolve umasters             - master nodes operation still
852          *                                in progress
853          *
854          * Resolve nmasters_keymatch    - master nodes match the passed-in
855          *                                key and may or may not match
856          *                                the quorum-agreed tid.
857          * 
858          * The quorum-agreed TID is the highest matching TID.
859          */
860         last_best_quorum_tid = HAMMER2_TID_MAX;
861         quorum_tid = 0;         /* fix gcc warning */
862
863         while (nmasters < nquorum && last_best_quorum_tid != 0) {
864                 nmasters = 0;
865                 quorum_tid = 0;
866
867                 for (i = 0; i < cluster->nchains; ++i) {
868                         /* XXX SOFT smpresent handling */
869                         if (cluster->pmp->pfs_types[i] !=
870                             HAMMER2_PFSTYPE_MASTER) {
871                                 continue;
872                         }
873
874                         chain = cluster->array[i].chain;
875
876                         /*
877                          * Skip elements still in progress.  umasters keeps
878                          * track of masters that might still be in-progress.
879                          */
880                         if (chain == NULL && (cluster->array[i].flags &
881                                               HAMMER2_CITEM_NULL) == 0) {
882                                 ++umasters;
883                                 continue;
884                         }
885
886                         /*
887                          * Key match?
888                          */
889                         if (flags & HAMMER2_CHECK_NULL) {
890                                 if (chain == NULL) {
891                                         ++nmasters;
892                                         ++nmasters_keymatch;
893                                 }
894                         } else if (chain && chain->bref.key == key) {
895                                 ++nmasters_keymatch;
896                                 if (quorum_tid < last_best_quorum_tid &&
897                                     (quorum_tid < chain->bref.modify_tid ||
898                                      nmasters == 0)) {
899                                         /*
900                                          * Better TID located, reset
901                                          * nmasters count.
902                                          */
903                                         nmasters = 0;
904                                         quorum_tid = chain->bref.modify_tid;
905                                 }
906                                 if (quorum_tid == chain->bref.modify_tid) {
907                                         /*
908                                          * TID matches current collection.
909                                          */
910                                         ++nmasters;
911                                         if (chain->error == 0) {
912                                                 cluster->focus = chain;
913                                                 cluster->focus_index = i;
914                                         }
915                                 }
916                         }
917                 }
918                 if (nmasters >= nquorum)
919                         break;
920                 last_best_quorum_tid = quorum_tid;
921         }
922
923         /*
924         kprintf("nmasters %d/%d nmaster_keymatch=%d umasters=%d\n",
925                 nmasters, nquorum, nmasters_keymatch, umasters);
926         */
927
928         /*
929          * Early return if we do not have enough masters.
930          */
931         if (nmasters < nquorum) {
932                 if (nmasters + umasters >= nquorum)
933                         return EINPROGRESS;
934                 if (nmasters_keymatch < nquorum) 
935                         return ESRCH;
936                 return EDEADLK;
937         }
938
939         /*
940          * Validated end of scan.
941          */
942         if (flags & HAMMER2_CHECK_NULL)
943                 return ENOENT;
944
945         /*
946          * If we have a NULL focus at this point the agreeing quorum all
947          * had chain errors.
948          */
949         if (cluster->focus == NULL)
950                 return EIO;
951
952         /*
953          * Pass 3
954          *
955          * We have quorum agreement, validate elements, not end of scan.
956          */
957         for (i = 0; i < cluster->nchains; ++i) {
958                 chain = cluster->array[i].chain;
959                 if (chain == NULL ||
960                     chain->bref.key != key ||
961                     chain->bref.modify_tid != quorum_tid) {
962                         continue;
963                 }
964
965                 switch (cluster->pmp->pfs_types[i]) {
966                 case HAMMER2_PFSTYPE_MASTER:
967                         cluster->array[i].flags |= HAMMER2_CITEM_FEMOD;
968                         cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
969                         nflags |= HAMMER2_CLUSTER_WRHARD;
970                         nflags |= HAMMER2_CLUSTER_RDHARD;
971                         break;
972                 case HAMMER2_PFSTYPE_SLAVE:
973                         /*
974                          * We must have enough up-to-date masters to reach
975                          * a quorum and the slave modify_tid must match the
976                          * quorum's modify_tid.
977                          *
978                          * Do not select an errored slave.
979                          */
980                         cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
981                         nflags |= HAMMER2_CLUSTER_RDHARD;
982                         ++nslaves;
983                         break;
984                 case HAMMER2_PFSTYPE_SOFT_MASTER:
985                         /*
986                          * Directly mounted soft master always wins.  There
987                          * should be only one.
988                          */
989                         cluster->array[i].flags |= HAMMER2_CITEM_FEMOD;
990                         cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
991                         break;
992                 case HAMMER2_PFSTYPE_SOFT_SLAVE:
993                         /*
994                          * Directly mounted soft slave always wins.  There
995                          * should be only one.
996                          *
997                          * XXX
998                          */
999                         cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
1000                         break;
1001                 case HAMMER2_PFSTYPE_SUPROOT:
1002                         /*
1003                          * spmp (degenerate case)
1004                          */
1005                         cluster->array[i].flags |= HAMMER2_CITEM_FEMOD;
1006                         cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
1007                         break;
1008                 default:
1009                         break;
1010                 }
1011         }
1012
1013         /*
1014          * Focus now set, adjust ddflag.  Skip this pass if the focus
1015          * is bad or if we are at the PFS root (the bref won't match at
1016          * the PFS root, obviously).
1017          */
1018         focus = cluster->focus;
1019         if (focus) {
1020                 cluster->ddflag =
1021                         (cluster->focus->bref.type == HAMMER2_BREF_TYPE_INODE);
1022         } else {
1023                 cluster->ddflag = 0;
1024                 goto skip4;
1025         }
1026         if (cluster->focus->flags & HAMMER2_CHAIN_PFSBOUNDARY)
1027                 goto skip4;
1028
1029         /*
1030          * Pass 4
1031          *
1032          * Validate the elements that were not marked invalid.  They should
1033          * match.
1034          */
1035         for (i = 0; i < cluster->nchains; ++i) {
1036                 int ddflag;
1037
1038                 chain = cluster->array[i].chain;
1039
1040                 if (chain == NULL)
1041                         continue;
1042                 if (chain == focus)
1043                         continue;
1044                 if (cluster->array[i].flags & HAMMER2_CITEM_INVALID)
1045                         continue;
1046
1047                 ddflag = (chain->bref.type == HAMMER2_BREF_TYPE_INODE);
1048                 if (chain->bref.type != focus->bref.type ||
1049                     chain->bref.key != focus->bref.key ||
1050                     chain->bref.keybits != focus->bref.keybits ||
1051                     chain->bref.modify_tid != focus->bref.modify_tid ||
1052                     chain->bytes != focus->bytes ||
1053                     ddflag != cluster->ddflag) {
1054                         cluster->array[i].flags |= HAMMER2_CITEM_INVALID;
1055                         if (hammer2_debug & 1)
1056                         kprintf("cluster_resolve: matching modify_tid failed "
1057                                 "bref test: idx=%d type=%02x/%02x "
1058                                 "key=%016jx/%d-%016jx/%d "
1059                                 "mod=%016jx/%016jx bytes=%u/%u\n",
1060                                 i,
1061                                 chain->bref.type, focus->bref.type,
1062                                 chain->bref.key, chain->bref.keybits,
1063                                 focus->bref.key, focus->bref.keybits,
1064                                 chain->bref.modify_tid, focus->bref.modify_tid,
1065                                 chain->bytes, focus->bytes);
1066                         if (hammer2_debug & 0x4000)
1067                                 panic("cluster_resolve");
1068                         /* flag issue and force resync? */
1069                 }
1070         }
1071 skip4:
1072
1073         if (ttlslaves == 0)
1074                 nflags |= HAMMER2_CLUSTER_NOSOFT;
1075         if (ttlmasters == 0)
1076                 nflags |= HAMMER2_CLUSTER_NOHARD;
1077
1078         /*
1079          * Set SSYNCED or MSYNCED for slaves and masters respectively if
1080          * all available nodes (even if 0 are available) are fully
1081          * synchronized.  This is used by the synchronization thread to
1082          * determine if there is work it could potentially accomplish.
1083          */
1084         if (nslaves == ttlslaves)
1085                 nflags |= HAMMER2_CLUSTER_SSYNCED;
1086         if (nmasters == ttlmasters)
1087                 nflags |= HAMMER2_CLUSTER_MSYNCED;
1088
1089         /*
1090          * Determine if the cluster was successfully locked for the
1091          * requested operation and generate an error code.  The cluster
1092          * will not be locked (or ref'd) if an error is returned.
1093          *
1094          * Caller can use hammer2_cluster_rdok() and hammer2_cluster_wrok()
1095          * to determine if reading or writing is possible.  If writing, the
1096          * cluster still requires a call to hammer2_cluster_modify() first.
1097          */
1098         atomic_set_int(&cluster->flags, nflags);
1099         atomic_clear_int(&cluster->flags, HAMMER2_CLUSTER_ZFLAGS & ~nflags);
1100
1101         return 0;
1102 }
1103
1104 /*
1105  * This is used by the sync thread to force non-NULL elements of a copy
1106  * of the pmp->iroot cluster to be good which is required to prime the
1107  * sync.
1108  */
1109 void
1110 hammer2_cluster_forcegood(hammer2_cluster_t *cluster)
1111 {
1112         int i;
1113
1114         for (i = 0; i < cluster->nchains; ++i) {
1115                 if (cluster->array[i].chain)
1116                         cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
1117         }
1118 }
1119
1120 /*
1121  * Copy a cluster, returned a ref'd cluster.  All underlying chains
1122  * are also ref'd, but not locked.  Focus state is also copied.
1123  *
1124  * Original cluster does not have to be locked but usually is.
1125  * New cluster will not be flagged as locked.
1126  *
1127  * Callers using this function to initialize a new cluster from an inode
1128  * generally lock and resolve the resulting cluster.
1129  *
1130  * Callers which use this function to save/restore a cluster structure
1131  * generally retain the focus state and do not re-resolve it.  Caller should
1132  * not try to re-resolve internal (cparent) node state during an iteration
1133  * as the individual tracking elements of cparent in an iteration may not
1134  * match even though they are correct.
1135  */
1136 hammer2_cluster_t *
1137 hammer2_cluster_copy(hammer2_cluster_t *ocluster)
1138 {
1139         hammer2_pfs_t *pmp = ocluster->pmp;
1140         hammer2_cluster_t *ncluster;
1141         hammer2_chain_t *chain;
1142         int i;
1143
1144         ncluster = kmalloc(sizeof(*ncluster), M_HAMMER2, M_WAITOK | M_ZERO);
1145         ncluster->pmp = pmp;
1146         ncluster->nchains = ocluster->nchains;
1147         ncluster->refs = 1;
1148
1149         for (i = 0; i < ocluster->nchains; ++i) {
1150                 chain = ocluster->array[i].chain;
1151                 ncluster->array[i].chain = chain;
1152                 ncluster->array[i].flags = ocluster->array[i].flags;
1153                 if (chain)
1154                         hammer2_chain_ref(chain);
1155         }
1156         ncluster->focus_index = ocluster->focus_index;
1157         ncluster->focus = ocluster->focus;
1158         ncluster->flags = ocluster->flags & ~(HAMMER2_CLUSTER_LOCKED |
1159                                               HAMMER2_CLUSTER_INODE);
1160
1161         return (ncluster);
1162 }
1163
1164 /*
1165  * Unlock a cluster.  Refcount and focus is maintained.
1166  */
1167 void
1168 hammer2_cluster_unlock_except(hammer2_cluster_t *cluster, int idx)
1169 {
1170         hammer2_chain_t *chain;
1171         int i;
1172
1173         if ((cluster->flags & HAMMER2_CLUSTER_LOCKED) == 0) {
1174                 kprintf("hammer2_cluster_unlock: cluster %p not locked\n",
1175                         cluster);
1176         }
1177         KKASSERT(cluster->flags & HAMMER2_CLUSTER_LOCKED);
1178         KKASSERT(cluster->refs > 0);
1179         atomic_clear_int(&cluster->flags, HAMMER2_CLUSTER_LOCKED);
1180
1181         for (i = 0; i < cluster->nchains; ++i) {
1182                 if (i == idx)
1183                         continue;
1184                 chain = cluster->array[i].chain;
1185                 if (chain)
1186                         hammer2_chain_unlock(chain);
1187         }
1188 }
1189
1190 void
1191 hammer2_cluster_unlock(hammer2_cluster_t *cluster)
1192 {
1193         hammer2_cluster_unlock_except(cluster, -1);
1194 }
1195
1196 /*
1197  * Set an inode's cluster modified, marking the related chains RW and
1198  * duplicating them if necessary.
1199  *
1200  * The passed-in chain is a localized copy of the chain previously acquired
1201  * when the inode was locked (and possilby replaced in the mean time), and
1202  * must also be updated.  In fact, we update it first and then synchronize
1203  * the inode's cluster cache.
1204  */
1205 hammer2_inode_data_t *
1206 hammer2_cluster_modify_ip(hammer2_inode_t *ip,
1207                           hammer2_cluster_t *cluster, int flags)
1208 {
1209         hammer2_inode_modify(ip);
1210         hammer2_cluster_modify(cluster, flags);
1211         hammer2_inode_repoint(ip, NULL, cluster);
1212         return (&hammer2_cluster_wdata(cluster)->ipdata);
1213 }
1214
1215 /*
1216  * Adjust the cluster's chains to allow modification and adjust the
1217  * focus.  Data will be accessible on return.
1218  *
1219  * If our focused master errors on modify, re-resolve the cluster to
1220  * try to select a different master.
1221  */
1222 void
1223 hammer2_cluster_modify(hammer2_cluster_t *cluster, int flags)
1224 {
1225         hammer2_chain_t *chain;
1226         int resolve_again;
1227         int i;
1228
1229         resolve_again = 0;
1230         for (i = 0; i < cluster->nchains; ++i) {
1231                 if ((cluster->array[i].flags & HAMMER2_CITEM_FEMOD) == 0) {
1232                         cluster->array[i].flags |= HAMMER2_CITEM_INVALID;
1233                         continue;
1234                 }
1235                 chain = cluster->array[i].chain;
1236                 if (chain == NULL)
1237                         continue;
1238                 if (chain->error)
1239                         continue;
1240                 hammer2_chain_modify(chain, flags);
1241                 if (cluster->focus == chain && chain->error) {
1242                         cluster->error = chain->error;
1243                         resolve_again = 1;
1244                 }
1245         }
1246         if (resolve_again)
1247                 hammer2_cluster_resolve(cluster);
1248 }
1249
1250 /*
1251  * Synchronize modifications from the focus to other chains in a cluster.
1252  * Convenient because nominal API users can just modify the contents of the
1253  * focus (at least for non-blockref data).
1254  *
1255  * Nominal front-end operations only edit non-block-table data in a single
1256  * chain.  This code copies such modifications to the other chains in the
1257  * cluster.  Blocktable modifications are handled on a chain-by-chain basis
1258  * by both the frontend and the backend and will explode in fireworks if
1259  * blindly copied.
1260  */
1261 void
1262 hammer2_cluster_modsync(hammer2_cluster_t *cluster)
1263 {
1264         hammer2_chain_t *focus;
1265         hammer2_chain_t *scan;
1266         const hammer2_inode_data_t *ripdata;
1267         hammer2_inode_data_t *wipdata;
1268         int i;
1269
1270         focus = cluster->focus;
1271         KKASSERT(focus->flags & HAMMER2_CHAIN_MODIFIED);
1272
1273         for (i = 0; i < cluster->nchains; ++i) {
1274                 if ((cluster->array[i].flags & HAMMER2_CITEM_FEMOD) == 0)
1275                         continue;
1276                 scan = cluster->array[i].chain;
1277                 if (scan == NULL || scan == focus)
1278                         continue;
1279                 if (scan->error)
1280                         continue;
1281                 KKASSERT(scan->flags & HAMMER2_CHAIN_MODIFIED);
1282                 KKASSERT(focus->bytes == scan->bytes &&
1283                          focus->bref.type == scan->bref.type);
1284                 switch(focus->bref.type) {
1285                 case HAMMER2_BREF_TYPE_INODE:
1286                         ripdata = &focus->data->ipdata;
1287                         wipdata = &scan->data->ipdata;
1288                         if ((ripdata->meta.op_flags &
1289                             HAMMER2_OPFLAG_DIRECTDATA) == 0) {
1290                                 bcopy(ripdata, wipdata,
1291                                       offsetof(hammer2_inode_data_t, u));
1292                                 break;
1293                         }
1294                         /* fall through to full copy */
1295                 case HAMMER2_BREF_TYPE_DATA:
1296                         bcopy(focus->data, scan->data, focus->bytes);
1297                         break;
1298                 case HAMMER2_BREF_TYPE_FREEMAP_NODE:
1299                 case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
1300                 case HAMMER2_BREF_TYPE_FREEMAP:
1301                 case HAMMER2_BREF_TYPE_VOLUME:
1302                         panic("hammer2_cluster_modsync: illegal node type");
1303                         /* NOT REACHED */
1304                         break;
1305                 default:
1306                         panic("hammer2_cluster_modsync: unknown node type");
1307                         break;
1308                 }
1309         }
1310 }
1311
1312 /*
1313  * Lookup initialization/completion API.  Returns a locked, fully resolved
1314  * cluster with one ref.
1315  */
1316 hammer2_cluster_t *
1317 hammer2_cluster_lookup_init(hammer2_cluster_t *cparent, int flags)
1318 {
1319         hammer2_cluster_t *cluster;
1320
1321         cluster = hammer2_cluster_copy(cparent);
1322         if (flags & HAMMER2_LOOKUP_SHARED) {
1323                 hammer2_cluster_lock(cluster, HAMMER2_RESOLVE_ALWAYS |
1324                                               HAMMER2_RESOLVE_SHARED);
1325         } else {
1326                 hammer2_cluster_lock(cluster, HAMMER2_RESOLVE_ALWAYS);
1327         }
1328         hammer2_cluster_resolve(cluster);
1329
1330         return (cluster);
1331 }
1332
1333 void
1334 hammer2_cluster_lookup_done(hammer2_cluster_t *cparent)
1335 {
1336         if (cparent) {
1337                 hammer2_cluster_unlock(cparent);
1338                 hammer2_cluster_drop(cparent);
1339         }
1340 }
1341
1342 /*
1343  * Locate first match or overlap under parent, return a new, locked, resolved
1344  * cluster with one ref.
1345  *
1346  * Must never be called with HAMMER2_LOOKUP_MATCHIND.
1347  */
1348 hammer2_cluster_t *
1349 hammer2_cluster_lookup(hammer2_cluster_t *cparent, hammer2_key_t *key_nextp,
1350                      hammer2_key_t key_beg, hammer2_key_t key_end, int flags)
1351 {
1352         hammer2_pfs_t *pmp;
1353         hammer2_cluster_t *cluster;
1354         hammer2_chain_t *chain;
1355         hammer2_key_t key_accum;
1356         hammer2_key_t key_next;
1357         int null_count;
1358         int rflags;
1359         int i;
1360
1361         KKASSERT((flags & HAMMER2_LOOKUP_MATCHIND) == 0);
1362
1363         pmp = cparent->pmp;                             /* can be NULL */
1364         key_accum = *key_nextp;
1365         null_count = 0;
1366         if (flags & HAMMER2_LOOKUP_SHARED)
1367                 rflags = HAMMER2_RESOLVE_SHARED;
1368         else
1369                 rflags = 0;
1370
1371         cluster = kmalloc(sizeof(*cluster), M_HAMMER2, M_WAITOK | M_ZERO);
1372         cluster->pmp = pmp;                             /* can be NULL */
1373         cluster->refs = 1;
1374         if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0)
1375                 cluster->flags |= HAMMER2_CLUSTER_LOCKED;
1376
1377         /*
1378          * Iterating earlier cluster elements with later elements still
1379          * locked is a problem, so we have to unlock the parent and then
1380          * re-lock as we go.
1381          */
1382         hammer2_cluster_unlock(cparent);
1383         cparent->flags |= HAMMER2_CLUSTER_LOCKED;
1384
1385         /*
1386          * Pass-1, issue lookups.
1387          */
1388         for (i = 0; i < cparent->nchains; ++i) {
1389                 cluster->array[i].flags = cparent->array[i].flags;
1390                 key_next = *key_nextp;
1391
1392                 /*
1393                  * Always relock the parent as we go.
1394                  */
1395                 if (cparent->array[i].chain) {
1396                         hammer2_chain_lock(cparent->array[i].chain, rflags);
1397                 }
1398
1399                 /*
1400                  * Nothing to base the lookup, or parent was not synchronized.
1401                  */
1402                 if (cparent->array[i].chain == NULL ||
1403                     (cparent->array[i].flags & HAMMER2_CITEM_INVALID)) {
1404                         ++null_count;
1405                         continue;
1406                 }
1407
1408                 chain = hammer2_chain_lookup(&cparent->array[i].chain,
1409                                              &key_next,
1410                                              key_beg, key_end,
1411                                              &cparent->array[i].cache_index,
1412                                              flags);
1413                 cluster->array[i].chain = chain;
1414                 if (chain == NULL) {
1415                         ++null_count;
1416                 }
1417                 if (key_accum > key_next)
1418                         key_accum = key_next;
1419         }
1420
1421         /*
1422          * Cleanup
1423          */
1424         cluster->nchains = i;
1425         *key_nextp = key_accum;
1426
1427         /*
1428          * The cluster must be resolved, out of sync elements may be present.
1429          *
1430          * If HAMMER2_LOOKUP_ALLNODES is not set focus must be non-NULL.
1431          */
1432         if (null_count != i)
1433                 hammer2_cluster_resolve(cluster);
1434         if (null_count == i ||
1435             (cluster->focus == NULL &&
1436              (flags & HAMMER2_LOOKUP_ALLNODES) == 0)) {
1437                 if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0)
1438                         hammer2_cluster_unlock(cluster);
1439                 hammer2_cluster_drop(cluster);
1440                 cluster = NULL;
1441         }
1442
1443         return (cluster);
1444 }
1445
1446 /*
1447  * Locate next match or overlap under parent, replace the passed-in cluster.
1448  * The returned cluster is a new, locked, resolved cluster with one ref.
1449  *
1450  * Must never be called with HAMMER2_LOOKUP_MATCHIND.
1451  */
1452 hammer2_cluster_t *
1453 hammer2_cluster_next(hammer2_cluster_t *cparent, hammer2_cluster_t *cluster,
1454                      hammer2_key_t *key_nextp,
1455                      hammer2_key_t key_beg, hammer2_key_t key_end, int flags)
1456 {
1457         hammer2_chain_t *ochain;
1458         hammer2_chain_t *nchain;
1459         hammer2_key_t key_accum;
1460         hammer2_key_t key_next;
1461         int parent_index;
1462         int cluster_index;
1463         int null_count;
1464         int rflags;
1465         int i;
1466
1467         KKASSERT((flags & HAMMER2_LOOKUP_MATCHIND) == 0);
1468
1469         key_accum = *key_nextp;
1470         null_count = 0;
1471         parent_index = cparent->focus_index;    /* save prior focus */
1472         cluster_index = cluster->focus_index;
1473         if (flags & HAMMER2_LOOKUP_SHARED)
1474                 rflags = HAMMER2_RESOLVE_SHARED;
1475         else
1476                 rflags = 0;
1477
1478         cluster->focus = NULL;          /* XXX needed any more? */
1479         /*cparent->focus = NULL;*/
1480         cluster->focus_index = 0;       /* XXX needed any more? */
1481         /*cparent->focus_index = 0;*/
1482
1483         cluster->ddflag = 0;
1484
1485         /*
1486          * The parent is always locked on entry, the iterator may be locked
1487          * depending on flags.
1488          *
1489          * We must temporarily unlock the passed-in clusters to avoid a
1490          * deadlock between elements of the cluster with other threads.
1491          * We will fixup the lock in the loop.
1492          *
1493          * Note that this will clear the focus.
1494          *
1495          * Reflag the clusters as locked, because we will relock them
1496          * as we go.
1497          */
1498         if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0) {
1499                 hammer2_cluster_unlock(cluster);
1500                 cluster->flags |= HAMMER2_CLUSTER_LOCKED;
1501         }
1502         hammer2_cluster_unlock(cparent);
1503         cparent->flags |= HAMMER2_CLUSTER_LOCKED;
1504
1505         for (i = 0; i < cparent->nchains; ++i) {
1506                 key_next = *key_nextp;
1507                 ochain = cluster->array[i].chain;
1508
1509                 /*
1510                  * Always relock the parent as we go.
1511                  */
1512                 if (cparent->array[i].chain)
1513                         hammer2_chain_lock(cparent->array[i].chain, rflags);
1514
1515                 /*
1516                  * Nothing to iterate from.  These cases can occur under
1517                  * normal operations.  For example, during synchronization
1518                  * a slave might reach the end of its scan while records
1519                  * are still left on the master(s).
1520                  */
1521                 if (ochain == NULL) {
1522                         ++null_count;
1523                         continue;
1524                 }
1525                 if (cparent->array[i].chain == NULL ||
1526                     (cparent->array[i].flags & HAMMER2_CITEM_INVALID) ||
1527                     (cluster->array[i].flags & HAMMER2_CITEM_INVALID)) {
1528                         /* ochain has not yet been relocked */
1529                         hammer2_chain_drop(ochain);
1530                         cluster->array[i].chain = NULL;
1531                         ++null_count;
1532                         continue;
1533                 }
1534
1535                 /*
1536                  * Relock the child if necessary.  Parent and child will then
1537                  * be locked as expected by hammer2_chain_next() and flags.
1538                  */
1539                 if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0)
1540                         hammer2_chain_lock(ochain, rflags);
1541                 nchain = hammer2_chain_next(&cparent->array[i].chain, ochain,
1542                                             &key_next, key_beg, key_end,
1543                                             &cparent->array[i].cache_index,
1544                                             flags);
1545                 /* ochain now invalid but can still be used for focus check */
1546                 if (parent_index == i) {
1547                         cparent->focus_index = i;
1548                         cparent->focus = cparent->array[i].chain;
1549                 }
1550
1551                 cluster->array[i].chain = nchain;
1552                 if (nchain == NULL) {
1553                         ++null_count;
1554                 }
1555                 if (key_accum > key_next)
1556                         key_accum = key_next;
1557         }
1558
1559         /*
1560          * Cleanup
1561          */
1562         cluster->nchains = i;
1563         *key_nextp = key_accum;
1564
1565         /*
1566          * The cluster must be resolved, out of sync elements may be present.
1567          *
1568          * If HAMMER2_LOOKUP_ALLNODES is not set focus must be non-NULL.
1569          */
1570         if (null_count != i)
1571                 hammer2_cluster_resolve(cluster);
1572         if (null_count == i ||
1573             (cluster->focus == NULL &&
1574              (flags & HAMMER2_LOOKUP_ALLNODES) == 0)) {
1575                 if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0)
1576                         hammer2_cluster_unlock(cluster);
1577                 hammer2_cluster_drop(cluster);
1578                 cluster = NULL;
1579         }
1580         return(cluster);
1581 }
1582
1583 /*
1584  * Advance just one chain in the cluster and recalculate the invalid bit.
1585  * The cluster index is allowed to be flagged invalid on input and is
1586  * recalculated on return.
1587  *
1588  * (used during synchronization to advance past a chain being deleted).
1589  *
1590  * The chain being advanced must not be the focus and the clusters in
1591  * question must have already passed normal cluster_lookup/cluster_next
1592  * checks.
1593  *
1594  * The cluster always remains intact on return, so void function.
1595  */
1596 void
1597 hammer2_cluster_next_single_chain(hammer2_cluster_t *cparent,
1598                                   hammer2_cluster_t *cluster,
1599                                   hammer2_key_t *key_nextp,
1600                                   hammer2_key_t key_beg,
1601                                   hammer2_key_t key_end,
1602                                   int i, int flags)
1603 {
1604         hammer2_chain_t *ochain;
1605         hammer2_chain_t *nchain;
1606         hammer2_chain_t *focus;
1607         hammer2_key_t key_accum;
1608         hammer2_key_t key_next;
1609         int ddflag;
1610
1611         key_accum = *key_nextp;
1612         key_next = *key_nextp;
1613         ochain = cluster->array[i].chain;
1614         if (ochain == NULL)
1615                 goto done;
1616         KKASSERT(ochain != cluster->focus);
1617
1618         nchain = hammer2_chain_next(&cparent->array[i].chain, ochain,
1619                                     &key_next, key_beg, key_end,
1620                                     &cparent->array[i].cache_index,
1621                                     flags);
1622         /* ochain now invalid */
1623         if (cparent->focus_index == i)
1624                 cparent->focus = cparent->array[i].chain;
1625
1626         /*
1627          * Install nchain.  Note that nchain can be NULL, and can also
1628          * be in an unlocked state depending on flags.
1629          */
1630         cluster->array[i].chain = nchain;
1631         cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
1632
1633         if (key_accum > key_next)
1634                 key_accum = key_next;
1635
1636         focus = cluster->focus;
1637         if (focus == NULL)
1638                 goto done;
1639         if (nchain == NULL)
1640                 goto done;
1641 #if 0
1642         if (nchain == focus)    /* ASSERTED NOT TRUE */
1643                 ...
1644 #endif
1645         ddflag = (nchain->bref.type == HAMMER2_BREF_TYPE_INODE);
1646         if (nchain->bref.type != focus->bref.type ||
1647             nchain->bref.key != focus->bref.key ||
1648             nchain->bref.keybits != focus->bref.keybits ||
1649             nchain->bref.modify_tid != focus->bref.modify_tid ||
1650             nchain->bytes != focus->bytes ||
1651             ddflag != cluster->ddflag) {
1652                 cluster->array[i].flags |= HAMMER2_CITEM_INVALID;
1653         }
1654
1655 done:
1656         *key_nextp = key_accum;
1657 #if 0
1658         /*
1659          * For now don't re-resolve cluster->flags.
1660          */
1661         hammer2_cluster_resolve(cluster);
1662 #endif
1663 }
1664
1665 /*
1666  * Create a new cluster using the specified key
1667  */
1668 int
1669 hammer2_cluster_create(hammer2_pfs_t *pmp, hammer2_cluster_t *cparent,
1670                      hammer2_cluster_t **clusterp,
1671                      hammer2_key_t key, int keybits,
1672                      int type, size_t bytes, int flags)
1673 {
1674         hammer2_cluster_t *cluster;
1675         int error;
1676         int i;
1677
1678         if ((cluster = *clusterp) == NULL) {
1679                 cluster = kmalloc(sizeof(*cluster), M_HAMMER2,
1680                                   M_WAITOK | M_ZERO);
1681                 cluster->pmp = pmp;                     /* can be NULL */
1682                 cluster->refs = 1;
1683                 cluster->flags = HAMMER2_CLUSTER_LOCKED;
1684         }
1685         cluster->focus_index = 0;
1686         cluster->focus = NULL;
1687
1688         /*
1689          * NOTE: cluster->array[] entries can initially be NULL.  If
1690          *       *clusterp is supplied, skip NULL entries, otherwise
1691          *       create new chains.
1692          */
1693         for (i = 0; i < cparent->nchains; ++i) {
1694                 if ((cparent->array[i].flags & HAMMER2_CITEM_FEMOD) == 0) {
1695                         cluster->array[i].flags |= HAMMER2_CITEM_INVALID;
1696                         continue;
1697                 }
1698                 if (*clusterp) {
1699                         if ((cluster->array[i].flags &
1700                              HAMMER2_CITEM_FEMOD) == 0) {
1701                                 cluster->array[i].flags |=
1702                                                 HAMMER2_CITEM_INVALID;
1703                                 continue;
1704                         }
1705                         if (cluster->array[i].chain == NULL)
1706                                 continue;
1707                 }
1708                 error = hammer2_chain_create(&cparent->array[i].chain,
1709                                              &cluster->array[i].chain, pmp,
1710                                              key, keybits,
1711                                              type, bytes, flags);
1712                 if (cparent->focus_index == i)
1713                         cparent->focus = cparent->array[i].chain;
1714                 KKASSERT(error == 0);
1715                 if (cluster->focus == NULL) {
1716                         cluster->focus_index = i;
1717                         cluster->focus = cluster->array[i].chain;
1718                 }
1719                 if (cparent->focus == cparent->array[i].chain) {
1720                         cluster->focus_index = i;
1721                         cluster->focus = cluster->array[i].chain;
1722                 }
1723         }
1724         cluster->nchains = i;
1725         *clusterp = cluster;
1726         hammer2_cluster_resolve(cluster);
1727
1728         return error;
1729 }
1730
1731 /*
1732  * Mark a cluster deleted
1733  */
1734 void
1735 hammer2_cluster_delete(hammer2_cluster_t *cparent,
1736                        hammer2_cluster_t *cluster, int flags)
1737 {
1738         hammer2_chain_t *chain;
1739         hammer2_chain_t *parent;
1740         int i;
1741
1742         if (cparent == NULL) {
1743                 kprintf("cparent is NULL\n");
1744                 return;
1745         }
1746
1747         for (i = 0; i < cluster->nchains; ++i) {
1748                 if ((cluster->array[i].flags & HAMMER2_CITEM_FEMOD) == 0) {
1749                         cluster->array[i].flags |= HAMMER2_CITEM_INVALID;
1750                         continue;
1751                 }
1752                 parent = cparent->array[i].chain;
1753                 chain = cluster->array[i].chain;
1754                 if (chain == NULL)
1755                         continue;
1756                 if (chain->parent != parent) {
1757                         kprintf("hammer2_cluster_delete: parent "
1758                                 "mismatch chain=%p parent=%p against=%p\n",
1759                                 chain, chain->parent, parent);
1760                 } else {
1761                         hammer2_chain_delete(parent, chain, flags);
1762                 }
1763         }
1764 }
1765
1766 /*
1767  * Create a snapshot of the specified {parent, ochain} with the specified
1768  * label.  The originating hammer2_inode must be exclusively locked for
1769  * safety.
1770  *
1771  * The ioctl code has already synced the filesystem.
1772  */
1773 int
1774 hammer2_cluster_snapshot(hammer2_cluster_t *ocluster,
1775                        hammer2_ioc_pfs_t *pmp)
1776 {
1777         hammer2_dev_t *hmp;
1778         const hammer2_inode_data_t *ripdata;
1779         hammer2_inode_data_t *wipdata;
1780         hammer2_chain_t *nchain;
1781         hammer2_inode_t *nip;
1782         size_t name_len;
1783         hammer2_key_t lhc;
1784         struct vattr vat;
1785 #if 0
1786         uuid_t opfs_clid;
1787 #endif
1788         int error;
1789
1790         kprintf("snapshot %s\n", pmp->name);
1791
1792         name_len = strlen(pmp->name);
1793         lhc = hammer2_dirhash(pmp->name, name_len);
1794
1795         /*
1796          * Get the clid
1797          */
1798         ripdata = &hammer2_cluster_rdata(ocluster)->ipdata;
1799 #if 0
1800         opfs_clid = ripdata->meta.pfs_clid;
1801 #endif
1802         hmp = ocluster->focus->hmp;     /* XXX find synchronized local disk */
1803
1804         /*
1805          * Create the snapshot directory under the super-root
1806          *
1807          * Set PFS type, generate a unique filesystem id, and generate
1808          * a cluster id.  Use the same clid when snapshotting a PFS root,
1809          * which theoretically allows the snapshot to be used as part of
1810          * the same cluster (perhaps as a cache).
1811          *
1812          * Copy the (flushed) blockref array.  Theoretically we could use
1813          * chain_duplicate() but it becomes difficult to disentangle
1814          * the shared core so for now just brute-force it.
1815          */
1816         VATTR_NULL(&vat);
1817         vat.va_type = VDIR;
1818         vat.va_mode = 0755;
1819         nip = hammer2_inode_create(hmp->spmp->iroot, &vat, proc0.p_ucred,
1820                                    pmp->name, name_len,
1821                                    1, 0, 0,
1822                                    HAMMER2_INSERT_PFSROOT, &error);
1823
1824         if (nip) {
1825                 hammer2_inode_modify(nip);
1826                 nchain = hammer2_inode_chain(nip, 0, HAMMER2_RESOLVE_ALWAYS);
1827                 hammer2_chain_modify(nchain, 0);
1828                 wipdata = &nchain->data->ipdata;
1829
1830                 nip->meta.pfs_type = HAMMER2_PFSTYPE_MASTER;
1831                 nip->meta.pfs_subtype = HAMMER2_PFSSUBTYPE_SNAPSHOT;
1832                 nip->meta.op_flags |= HAMMER2_OPFLAG_PFSROOT;
1833                 kern_uuidgen(&nip->meta.pfs_fsid, 1);
1834
1835                 /*
1836                  * Give the snapshot its own private cluster id.  As a
1837                  * snapshot no further synchronization with the original
1838                  * cluster will be done.
1839                  */
1840 #if 0
1841                 if (ocluster->focus->flags & HAMMER2_CHAIN_PFSBOUNDARY)
1842                         nip->meta.pfs_clid = opfs_clid;
1843                 else
1844                         kern_uuidgen(&nip->meta.pfs_clid, 1);
1845 #endif
1846                 kern_uuidgen(&nip->meta.pfs_clid, 1);
1847                 nchain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT;
1848
1849                 /* XXX hack blockset copy */
1850                 /* XXX doesn't work with real cluster */
1851                 KKASSERT(ocluster->nchains == 1);
1852                 wipdata->meta = nip->meta;
1853                 wipdata->u.blockset = ripdata->u.blockset;
1854                 hammer2_flush(nchain, 1);
1855                 hammer2_chain_unlock(nchain);
1856                 hammer2_chain_drop(nchain);
1857                 hammer2_inode_unlock(nip, NULL);
1858         }
1859         return (error);
1860 }
1861
1862 /*
1863  * Return locked parent cluster given a locked child.  The child remains
1864  * locked on return.  The new parent's focus follows the child's focus
1865  * and the parent is always resolved.
1866  *
1867  * We must temporarily unlock the passed-in cluster to avoid a deadlock
1868  * between elements of the cluster.
1869  *
1870  * We must not try to hammer2_cluster_resolve() cparent.  The individual
1871  * parent chains for the nodes are the correct parents for the cluster but
1872  * do not necessarily match, so resolve would likely implode.
1873  */
1874 hammer2_cluster_t *
1875 hammer2_cluster_parent(hammer2_cluster_t *cluster)
1876 {
1877         hammer2_cluster_t *cparent;
1878         int i;
1879
1880         cparent = hammer2_cluster_copy(cluster);
1881         hammer2_cluster_unlock(cluster);
1882
1883         for (i = 0; i < cparent->nchains; ++i) {
1884                 hammer2_chain_t *chain;
1885                 hammer2_chain_t *rchain;
1886
1887                 /*
1888                  * Calculate parent for each element.  Old chain has an extra
1889                  * ref for cparent but the lock remains with cluster.
1890                  */
1891                 chain = cparent->array[i].chain;
1892                 if (chain == NULL)
1893                         continue;
1894                 while ((rchain = chain->parent) != NULL) {
1895                         hammer2_chain_ref(rchain);
1896                         hammer2_chain_lock(rchain, HAMMER2_RESOLVE_ALWAYS);
1897                         if (chain->parent == rchain)
1898                                 break;
1899                         hammer2_chain_unlock(rchain);
1900                         hammer2_chain_drop(rchain);
1901                 }
1902                 cparent->array[i].chain = rchain;
1903                 hammer2_chain_drop(chain);
1904         }
1905         cparent->flags |= HAMMER2_CLUSTER_LOCKED;
1906         /* hammer2_cluster_resolve(cparent); */
1907         hammer2_cluster_lock(cluster, HAMMER2_RESOLVE_ALWAYS);
1908
1909         return cparent;
1910 }
1911
1912 /************************************************************************
1913  *                              CLUSTER I/O                             *
1914  ************************************************************************
1915  *
1916  *
1917  * WARNING! blockref[] array data is not universal.  These functions should
1918  *          only be used to access universal data.
1919  *
1920  * NOTE!    The rdata call will wait for at least one of the chain I/Os to
1921  *          complete if necessary.  The I/O's should have already been
1922  *          initiated by the cluster_lock/chain_lock operation.
1923  *
1924  *          The cluster must already be in a modified state before wdata
1925  *          is called.  The data will already be available for this case.
1926  */
1927 const hammer2_media_data_t *
1928 hammer2_cluster_rdata(hammer2_cluster_t *cluster)
1929 {
1930         KKASSERT(cluster->focus != NULL);
1931         return(cluster->focus->data);
1932 }
1933
1934 const hammer2_media_data_t *
1935 hammer2_cluster_rdata_bytes(hammer2_cluster_t *cluster, size_t *bytesp)
1936 {
1937         KKASSERT(cluster->focus != NULL);
1938         *bytesp = cluster->focus->bytes;
1939         return(cluster->focus->data);
1940 }
1941
1942 hammer2_media_data_t *
1943 hammer2_cluster_wdata(hammer2_cluster_t *cluster)
1944 {
1945         KKASSERT(cluster->focus != NULL);
1946         KKASSERT(hammer2_cluster_modified(cluster));
1947         return(cluster->focus->data);
1948 }