nrelease - fix/improve livecd
[dragonfly.git] / sys / vfs / hammer2 / hammer2_cluster.c
1 /*
2  * Copyright (c) 2013-2018 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * The cluster module collects multiple chains representing the same
36  * information from different nodes into a single entity.  It allows direct
37  * access to media data as long as it is not blockref array data (which
38  * will obviously have to be different at each node).
39  *
40  * This module also handles I/O dispatch, status rollup, and various
41  * mastership arrangements including quorum operations.  It effectively
42  * presents one topology to the vnops layer.
43  *
44  * Many of the API calls mimic chain API calls but operate on clusters
45  * instead of chains.  Please see hammer2_chain.c for more complete code
46  * documentation of the API functions.
47  *
48  * WARNING! This module is *extremely* complex.  It must issue asynchronous
49  *          locks and I/O, do quorum and/or master-slave processing, and
50  *          it must operate properly even if some nodes are broken (which
51  *          can also mean indefinite locks).
52  *
53  *                              CLUSTER OPERATIONS
54  *
55  * Cluster operations can be broken down into three pieces:
56  *
57  * (1) Chain locking and data retrieval.
58  *
59  *      - Most complex functions, quorum management on transaction ids.
60  *
61  *      - Locking and data accesses must be internally asynchronous.
62  *
63  *      - Validate and manage cache coherency primitives (cache state
64  *        is stored in chain topologies but must be validated by these
65  *        functions).
66  *
67  * (2) Lookups and Scans
68  *              hammer2_cluster_lookup()
69  *              hammer2_cluster_next()
70  *
71  *      - Depend on locking & data retrieval functions, but still complex.
72  *
73  *      - Must do quorum management on transaction ids.
74  *
75  *      - Lookup and Iteration ops Must be internally asynchronous.
76  *
77  * (3) Modifying Operations
78  *              hammer2_cluster_create()
79  *
80  *      - Can usually punt on failures, operation continues unless quorum
81  *        is lost.  If quorum is lost, must wait for resynchronization
82  *        (depending on the management mode).
83  *
84  *      - Must disconnect node on failures (also not flush), remount, and
85  *        resynchronize.
86  *
87  *      - Network links (via kdmsg) are relatively easy to issue as the
88  *        complex underworkings of hammer2_chain.c don't have to messed
89  *        with (the protocol is at a higher level than block-level).
90  *
91  *      - Multiple local disk nodes (i.e. block devices) are another matter.
92  *        Chain operations have to be dispatched to per-node threads (xN)
93  *        because we can't asynchronize potentially very complex chain
94  *        operations in hammer2_chain.c (it would be a huge mess).
95  *
96  *        (these threads are also used to terminate incoming kdmsg ops from
97  *        other machines).
98  *
99  *      - Single-node filesystems do not use threads and will simply call
100  *        hammer2_chain.c functions directly.  This short-cut is handled
101  *        at the base of each cluster function.
102  */
103 #include <sys/cdefs.h>
104 #include <sys/param.h>
105 #include <sys/systm.h>
106 #include <sys/types.h>
107
108 #include "hammer2.h"
109
110 /*
111  * Returns the bref type of the cluster's foucs.
112  *
113  * If the cluster is errored, returns HAMMER2_BREF_TYPE_EMPTY (0).
114  * The cluster must be locked.
115  */
116 uint8_t
117 hammer2_cluster_type(hammer2_cluster_t *cluster)
118 {
119         if (cluster->error == 0) {
120                 KKASSERT(cluster->focus != NULL);
121                 return(cluster->focus->bref.type);
122         }
123         return 0;
124 }
125
126 /*
127  * Returns the bref of the cluster's focus, sans any data-offset information
128  * (since offset information is per-node and wouldn't be useful).
129  *
130  * Callers use this function to access modify_tid, mirror_tid, type,
131  * key, and keybits.
132  *
133  * If the cluster is errored, returns an empty bref.
134  * The cluster must be locked.
135  */
136 void
137 hammer2_cluster_bref(hammer2_cluster_t *cluster, hammer2_blockref_t *bref)
138 {
139         if (cluster->error == 0) {
140                 KKASSERT(cluster->focus != NULL);
141                 *bref = cluster->focus->bref;
142                 bref->data_off = 0;
143         } else {
144                 bzero(bref, sizeof(*bref));
145         }
146 }
147
148 /*
149  * Create a degenerate cluster with one ref from a single locked chain.
150  * The returned cluster will be focused on the chain and inherit its
151  * error state.
152  *
153  * The chain's lock and reference are transfered to the new cluster, so
154  * the caller should not try to unlock the chain separately.
155  *
156  * We fake the flags.
157  */
158 void
159 hammer2_dummy_xop_from_chain(hammer2_xop_head_t *xop, hammer2_chain_t *chain)
160 {
161         hammer2_cluster_t *cluster;
162
163         bzero(xop, sizeof(*xop));
164
165         cluster = &xop->cluster;
166         cluster->array[0].chain = chain;
167         cluster->array[0].flags = HAMMER2_CITEM_FEMOD;
168         cluster->nchains = 1;
169         cluster->focus = chain;
170         cluster->focus_index = 0;
171         cluster->pmp = chain->pmp;
172         cluster->refs = 1;
173         cluster->error = chain->error;
174         cluster->flags = HAMMER2_CLUSTER_LOCKED |
175                          HAMMER2_CLUSTER_WRHARD |
176                          HAMMER2_CLUSTER_RDHARD |
177                          HAMMER2_CLUSTER_MSYNCED |
178                          HAMMER2_CLUSTER_SSYNCED;
179 }
180
181 /*
182  * Add a reference to a cluster and its underlying chains.
183  *
184  * We must also ref the underlying chains in order to allow ref/unlock
185  * sequences to later re-lock.
186  */
187 void
188 hammer2_cluster_ref(hammer2_cluster_t *cluster)
189 {
190         atomic_add_int(&cluster->refs, 1);
191 }
192
193 /*
194  * Drop the caller's reference to the cluster.  When the ref count drops to
195  * zero this function frees the cluster and drops all underlying chains.
196  *
197  * In-progress read I/Os are typically detached from the cluster once the
198  * first one returns (the remaining stay attached to the DIOs but are then
199  * ignored and drop naturally).
200  */
201 void
202 hammer2_cluster_drop(hammer2_cluster_t *cluster)
203 {
204         hammer2_chain_t *chain;
205         int i;
206
207         KKASSERT(cluster->refs > 0);
208         if (atomic_fetchadd_int(&cluster->refs, -1) == 1) {
209                 cluster->focus = NULL;          /* safety XXX chg to assert */
210                 cluster->focus_index = 0;
211
212                 for (i = 0; i < cluster->nchains; ++i) {
213                         chain = cluster->array[i].chain;
214                         if (chain) {
215                                 hammer2_chain_drop(chain);
216                                 cluster->array[i].chain = NULL; /* safety */
217                         }
218                 }
219                 cluster->nchains = 0;                           /* safety */
220
221                 kfree(cluster, M_HAMMER2);
222                 /* cluster is invalid */
223         }
224 }
225
226 /*
227  * Lock a cluster.  Cluster must already be referenced.  Focus is maintained.
228  *
229  * WARNING! This function expects the caller to handle resolution of the
230  *          cluster.  We never re-resolve the cluster in this function,
231  *          because it might be used to temporarily unlock/relock a cparent
232  *          in an iteration or recursrion, and the cparents elements do not
233  *          necessarily match.
234  */
235 void
236 hammer2_cluster_lock(hammer2_cluster_t *cluster, int how)
237 {
238         hammer2_chain_t *chain;
239         int i;
240
241         /* cannot be on inode-embedded cluster template, must be on copy */
242         KKASSERT(cluster->refs > 0);
243         KKASSERT((cluster->flags & HAMMER2_CLUSTER_INODE) == 0);
244         if (cluster->flags & HAMMER2_CLUSTER_LOCKED) {
245                 panic("hammer2_cluster_lock: cluster %p already locked!\n",
246                         cluster);
247         }
248         atomic_set_int(&cluster->flags, HAMMER2_CLUSTER_LOCKED);
249
250         /*
251          * Lock chains and resolve state.
252          */
253         for (i = 0; i < cluster->nchains; ++i) {
254                 chain = cluster->array[i].chain;
255                 if (chain == NULL)
256                         continue;
257                 hammer2_chain_lock(chain, how);
258         }
259 }
260
261 void
262 hammer2_cluster_unhold(hammer2_cluster_t *cluster)
263 {
264         hammer2_chain_t *chain;
265         int i;
266
267         for (i = 0; i < cluster->nchains; ++i) {
268                 chain = cluster->array[i].chain;
269                 if (chain == NULL)
270                         continue;
271                 hammer2_chain_unhold(chain);
272         }
273 }
274
275 void
276 hammer2_cluster_rehold(hammer2_cluster_t *cluster)
277 {
278         hammer2_chain_t *chain;
279         int i;
280
281         for (i = 0; i < cluster->nchains; ++i) {
282                 chain = cluster->array[i].chain;
283                 if (chain == NULL)
284                         continue;
285                 hammer2_chain_rehold(chain);
286         }
287 }
288
289 /*
290  * This is used by the XOPS subsystem to calculate the state of
291  * the collection and tell hammer2_xop_collect() what to do with it.
292  * The collection can be in various states of desynchronization, the
293  * caller specifically wants to resolve the passed-in key.
294  *
295  * Return values (HAMMER2_ERROR_*):
296  *
297  *      0               - Quorum agreement, key is valid
298  *
299  *      ENOENT          - Quorum agreement, end of scan
300  *
301  *      ESRCH           - Quorum agreement, key is INVALID (caller should
302  *                        skip key).
303  *
304  *      EIO             - Quorum agreement but all elements had errors.
305  *
306  *      EDEADLK         - No quorum agreement possible for key, a repair
307  *                        may be needed.  Caller has to decide what to do,
308  *                        possibly iterating the key or generating an EIO.
309  *
310  *      EINPROGRESS     - No quorum agreement yet, but agreement is still
311  *                        possible if caller waits for more responses.  Caller
312  *                        should not iterate key.
313  *
314  *      CHECK           - CRC check error
315  *
316  * NOTE! If the pmp is in HMNT2_LOCAL mode, the cluster check always succeeds.
317  *
318  * XXX needs to handle SOFT_MASTER and SOFT_SLAVE
319  */
320 int
321 hammer2_cluster_check(hammer2_cluster_t *cluster, hammer2_key_t key, int flags)
322 {
323         hammer2_chain_t *chain;
324         hammer2_chain_t *focus;
325         hammer2_pfs_t *pmp;
326         hammer2_tid_t quorum_tid;
327         hammer2_tid_t last_best_quorum_tid;
328         uint32_t nflags;
329         int ttlmasters;
330         int ttlslaves;
331         int nmasters;
332         int nmasters_keymatch;
333         int nslaves;
334         int nquorum;
335         int umasters;   /* unknown masters (still in progress) */
336         int error;
337         int i;
338
339         cluster->error = 0;
340         cluster->focus = NULL;
341
342         pmp = cluster->pmp;
343         KKASSERT(pmp != NULL || cluster->nchains == 0);
344
345         /*
346          * Calculate quorum
347          */
348         nquorum = pmp ? pmp->pfs_nmasters / 2 + 1 : 0;
349         nflags = 0;
350         ttlmasters = 0;
351         ttlslaves = 0;
352
353         /*
354          * Pass 1
355          *
356          * NOTE: A NULL chain is not necessarily an error, it could be
357          *       e.g. a lookup failure or the end of an iteration.
358          *       Process normally.
359          */
360         for (i = 0; i < cluster->nchains; ++i) {
361                 cluster->array[i].flags &= ~HAMMER2_CITEM_FEMOD;
362                 cluster->array[i].flags |= HAMMER2_CITEM_INVALID;
363
364                 chain = cluster->array[i].chain;
365                 error = cluster->array[i].error;
366                 if (chain && error) {
367                         if (cluster->focus == NULL || cluster->focus == chain) {
368                                 /* error will be overridden by valid focus */
369                                 /* XXX */
370                         }
371
372                         /*
373                          * Must count total masters and slaves whether the
374                          * chain is errored or not.
375                          */
376                         switch (cluster->pmp->pfs_types[i]) {
377                         case HAMMER2_PFSTYPE_SUPROOT:
378                         case HAMMER2_PFSTYPE_MASTER:
379                                 ++ttlmasters;
380                                 break;
381                         case HAMMER2_PFSTYPE_SLAVE:
382                                 ++ttlslaves;
383                                 break;
384                         }
385                         continue;
386                 }
387                 switch (cluster->pmp->pfs_types[i]) {
388                 case HAMMER2_PFSTYPE_MASTER:
389                         ++ttlmasters;
390                         break;
391                 case HAMMER2_PFSTYPE_SLAVE:
392                         ++ttlslaves;
393                         break;
394                 case HAMMER2_PFSTYPE_SOFT_MASTER:
395                         nflags |= HAMMER2_CLUSTER_WRSOFT;
396                         nflags |= HAMMER2_CLUSTER_RDSOFT;
397                         break;
398                 case HAMMER2_PFSTYPE_SOFT_SLAVE:
399                         nflags |= HAMMER2_CLUSTER_RDSOFT;
400                         break;
401                 case HAMMER2_PFSTYPE_SUPROOT:
402                         /*
403                          * Degenerate cluster representing the super-root
404                          * topology on a single device.  Fake stuff so
405                          * cluster ops work as expected.
406                          */
407                         ++ttlmasters;
408                         nflags |= HAMMER2_CLUSTER_WRHARD;
409                         nflags |= HAMMER2_CLUSTER_RDHARD;
410                         cluster->focus_index = i;
411                         cluster->focus = chain;
412                         cluster->error = error;
413                         break;
414                 default:
415                         break;
416                 }
417         }
418
419         /*
420          * Pass 2
421          *
422          * Resolve nmasters             - master nodes fully match
423          *
424          * Resolve umasters             - master nodes operation still
425          *                                in progress
426          *
427          * Resolve nmasters_keymatch    - master nodes match the passed-in
428          *                                key and may or may not match
429          *                                the quorum-agreed tid.
430          *
431          * The quorum-agreed TID is the highest matching TID.
432          */
433         last_best_quorum_tid = HAMMER2_TID_MAX;
434         umasters = 0;
435         nmasters = 0;
436         nmasters_keymatch = 0;
437         quorum_tid = 0;         /* fix gcc warning */
438
439         while (nmasters < nquorum && last_best_quorum_tid != 0) {
440                 umasters = 0;
441                 nmasters = 0;
442                 nmasters_keymatch = 0;
443                 quorum_tid = 0;
444
445                 for (i = 0; i < cluster->nchains; ++i) {
446                         /* XXX SOFT smpresent handling */
447                         switch(cluster->pmp->pfs_types[i]) {
448                         case HAMMER2_PFSTYPE_MASTER:
449                         case HAMMER2_PFSTYPE_SUPROOT:
450                                 break;
451                         default:
452                                 continue;
453                         }
454
455                         chain = cluster->array[i].chain;
456                         error = cluster->array[i].error;
457
458                         /*
459                          * Skip elements still in progress.  umasters keeps
460                          * track of masters that might still be in-progress.
461                          */
462                         if (chain == NULL && (cluster->array[i].flags &
463                                               HAMMER2_CITEM_NULL) == 0) {
464                                 ++umasters;
465                                 continue;
466                         }
467
468                         /*
469                          * Key match?
470                          */
471                         if (flags & HAMMER2_CHECK_NULL) {
472                                 if (chain == NULL) {
473                                         ++nmasters;
474                                         ++nmasters_keymatch;
475                                         if (cluster->error == 0)
476                                                 cluster->error = error;
477                                 }
478                         } else if (chain &&
479                                    (key == (hammer2_key_t)-1 ||
480                                     chain->bref.key == key)) {
481                                 ++nmasters_keymatch;
482
483                                 if (chain->bref.modify_tid <
484                                      last_best_quorum_tid &&
485                                     quorum_tid < chain->bref.modify_tid) {
486                                         /*
487                                          * Select new TID as master if better
488                                          * than any found so far in this loop,
489                                          * as long as it does not reach the
490                                          * best tid found in the previous loop.
491                                          */
492                                         nmasters = 0;
493                                         quorum_tid = chain->bref.modify_tid;
494                                 }
495                                 if (quorum_tid == chain->bref.modify_tid) {
496                                         /*
497                                          * TID matches current collection.
498                                          *
499                                          * (error handled in next pass)
500                                          */
501                                         ++nmasters;
502                                         if (chain->error == 0) {
503                                                 cluster->focus = chain;
504                                                 cluster->focus_index = i;
505                                         }
506                                 }
507                         }
508                 }
509                 if (nmasters >= nquorum)
510                         break;
511                 last_best_quorum_tid = quorum_tid;
512         }
513
514         /*
515         kprintf("nmasters %d/%d nmaster_keymatch=%d umasters=%d\n",
516                 nmasters, nquorum, nmasters_keymatch, umasters);
517         */
518
519         /*
520          * Early return if we do not have enough masters.
521          */
522         if (nmasters < nquorum) {
523                 if (nmasters + umasters >= nquorum)
524                         return HAMMER2_ERROR_EINPROGRESS;
525                 if (nmasters_keymatch < nquorum)
526                         return HAMMER2_ERROR_ESRCH;
527                 return HAMMER2_ERROR_EDEADLK;
528         }
529
530         /*
531          * Validated end of scan.
532          */
533         if (flags & HAMMER2_CHECK_NULL) {
534                 if (cluster->error == 0)
535                         cluster->error = HAMMER2_ERROR_ENOENT;
536                 return cluster->error;
537         }
538
539         /*
540          * If we have a NULL focus at this point the agreeing quorum all
541          * had chain errors.
542          */
543         if (cluster->focus == NULL)
544                 return HAMMER2_ERROR_EIO;
545
546         /*
547          * Pass 3
548          *
549          * We have quorum agreement, validate elements, not end of scan.
550          */
551         nslaves = 0;
552         cluster->error = 0;
553
554         for (i = 0; i < cluster->nchains; ++i) {
555                 chain = cluster->array[i].chain;
556                 error = cluster->array[i].error;
557                 if (chain == NULL ||
558                     chain->bref.key != key ||
559                     chain->bref.modify_tid != quorum_tid) {
560                         continue;
561                 }
562
563                 /*
564                  * Quorum Match
565                  *
566                  * XXX for now, cumulative error.
567                  */
568                 if (cluster->error == 0)
569                         cluster->error = error;
570
571                 switch (cluster->pmp->pfs_types[i]) {
572                 case HAMMER2_PFSTYPE_MASTER:
573                         cluster->array[i].flags |= HAMMER2_CITEM_FEMOD;
574                         cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
575                         nflags |= HAMMER2_CLUSTER_WRHARD;
576                         nflags |= HAMMER2_CLUSTER_RDHARD;
577                         break;
578                 case HAMMER2_PFSTYPE_SLAVE:
579                         /*
580                          * We must have enough up-to-date masters to reach
581                          * a quorum and the slave modify_tid must match the
582                          * quorum's modify_tid.
583                          *
584                          * Do not select an errored slave.
585                          */
586                         cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
587                         nflags |= HAMMER2_CLUSTER_RDHARD;
588                         ++nslaves;
589                         break;
590                 case HAMMER2_PFSTYPE_SOFT_MASTER:
591                         /*
592                          * Directly mounted soft master always wins.  There
593                          * should be only one.
594                          */
595                         cluster->array[i].flags |= HAMMER2_CITEM_FEMOD;
596                         cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
597                         break;
598                 case HAMMER2_PFSTYPE_SOFT_SLAVE:
599                         /*
600                          * Directly mounted soft slave always wins.  There
601                          * should be only one.
602                          *
603                          * XXX
604                          */
605                         cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
606                         break;
607                 case HAMMER2_PFSTYPE_SUPROOT:
608                         /*
609                          * spmp (degenerate case)
610                          */
611                         cluster->array[i].flags |= HAMMER2_CITEM_FEMOD;
612                         cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
613                         nflags |= HAMMER2_CLUSTER_WRHARD;
614                         nflags |= HAMMER2_CLUSTER_RDHARD;
615                         break;
616                 default:
617                         break;
618                 }
619         }
620
621         /*
622          * Focus now set, adjust ddflag.  Skip this pass if the focus
623          * is bad or if we are at the PFS root (the bref won't match at
624          * the PFS root, obviously).
625          *
626          * focus is probably not locked and it isn't safe to test its
627          * content (e.g. focus->data, focus->dio, other content).  We
628          * do not synchronize the dio to the cpu here.  In fact, in numerous
629          * situations the frontend doesn't even need to access its dio/data,
630          * so synchronizing it here would be wasteful.
631          */
632         focus = cluster->focus;
633         if (focus) {
634                 cluster->ddflag =
635                         (cluster->focus->bref.type == HAMMER2_BREF_TYPE_INODE);
636         } else {
637                 cluster->ddflag = 0;
638                 goto skip4;
639         }
640         if (cluster->focus->flags & HAMMER2_CHAIN_PFSBOUNDARY)
641                 goto skip4;
642
643         /*
644          * Pass 4
645          *
646          * Validate the elements that were not marked invalid.  They should
647          * match.
648          */
649         for (i = 0; i < cluster->nchains; ++i) {
650                 int ddflag;
651
652                 chain = cluster->array[i].chain;
653
654                 if (chain == NULL)
655                         continue;
656                 if (chain == focus)
657                         continue;
658                 if (cluster->array[i].flags & HAMMER2_CITEM_INVALID)
659                         continue;
660
661                 ddflag = (chain->bref.type == HAMMER2_BREF_TYPE_INODE);
662                 if (chain->bref.type != focus->bref.type ||
663                     chain->bref.key != focus->bref.key ||
664                     chain->bref.keybits != focus->bref.keybits ||
665                     chain->bref.modify_tid != focus->bref.modify_tid ||
666                     chain->bytes != focus->bytes ||
667                     ddflag != cluster->ddflag) {
668                         cluster->array[i].flags |= HAMMER2_CITEM_INVALID;
669                         if (hammer2_debug & 1)
670                         kprintf("cluster_check: matching modify_tid failed "
671                                 "bref test: idx=%d type=%02x/%02x "
672                                 "key=%016jx/%d-%016jx/%d "
673                                 "mod=%016jx/%016jx bytes=%u/%u\n",
674                                 i,
675                                 chain->bref.type, focus->bref.type,
676                                 chain->bref.key, chain->bref.keybits,
677                                 focus->bref.key, focus->bref.keybits,
678                                 chain->bref.modify_tid, focus->bref.modify_tid,
679                                 chain->bytes, focus->bytes);
680                         if (hammer2_debug & 0x4000)
681                                 panic("cluster_check");
682                         /* flag issue and force resync? */
683                 }
684         }
685 skip4:
686
687         if (ttlslaves == 0)
688                 nflags |= HAMMER2_CLUSTER_NOSOFT;
689         if (ttlmasters == 0)
690                 nflags |= HAMMER2_CLUSTER_NOHARD;
691
692         /*
693          * Set SSYNCED or MSYNCED for slaves and masters respectively if
694          * all available nodes (even if 0 are available) are fully
695          * synchronized.  This is used by the synchronization thread to
696          * determine if there is work it could potentially accomplish.
697          */
698         if (nslaves == ttlslaves)
699                 nflags |= HAMMER2_CLUSTER_SSYNCED;
700         if (nmasters == ttlmasters)
701                 nflags |= HAMMER2_CLUSTER_MSYNCED;
702
703         /*
704          * Determine if the cluster was successfully locked for the
705          * requested operation and generate an error code.  The cluster
706          * will not be locked (or ref'd) if an error is returned.
707          */
708         atomic_set_int(&cluster->flags, nflags);
709         atomic_clear_int(&cluster->flags, HAMMER2_CLUSTER_ZFLAGS & ~nflags);
710
711         return cluster->error;
712 }
713
714 /*
715  * Unlock a cluster.  Refcount and focus is maintained.
716  */
717 void
718 hammer2_cluster_unlock(hammer2_cluster_t *cluster)
719 {
720         hammer2_chain_t *chain;
721         int i;
722
723         if ((cluster->flags & HAMMER2_CLUSTER_LOCKED) == 0) {
724                 kprintf("hammer2_cluster_unlock: cluster %p not locked\n",
725                         cluster);
726         }
727         KKASSERT(cluster->flags & HAMMER2_CLUSTER_LOCKED);
728         KKASSERT(cluster->refs > 0);
729         atomic_clear_int(&cluster->flags, HAMMER2_CLUSTER_LOCKED);
730
731         for (i = 0; i < cluster->nchains; ++i) {
732                 chain = cluster->array[i].chain;
733                 if (chain)
734                         hammer2_chain_unlock(chain);
735         }
736 }