655142e3adadde9544594a6ee4c1ac7d9bcf1ecb
[dragonfly.git] / sys / vfs / hammer2 / hammer2_syncthr.c
1 /*
2  * Copyright (c) 2015 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * This module implements various PFS-based helper threads.
36  */
37 #include "hammer2.h"
38
39 #define HAMMER2_SYNCTHR_DEBUG 1
40
41 static int hammer2_sync_slaves(hammer2_syncthr_t *thr,
42                         hammer2_cluster_t *cparent, int *errors);
43 static void hammer2_update_pfs_status(hammer2_syncthr_t *thr,
44                         hammer2_cluster_t *cparent);
45 static int hammer2_sync_insert(hammer2_syncthr_t *thr,
46                         hammer2_cluster_t *cparent, hammer2_cluster_t *cluster,
47                         hammer2_tid_t modify_tid,
48                         int i, int *errors);
49 static int hammer2_sync_destroy(hammer2_syncthr_t *thr,
50                         hammer2_cluster_t *cparent, hammer2_cluster_t *cluster,
51                         int i, int *errors);
52 static int hammer2_sync_replace(hammer2_syncthr_t *thr,
53                         hammer2_cluster_t *cparent, hammer2_cluster_t *cluster,
54                         hammer2_tid_t modify_tid,
55                         int i, int *errors);
56
57 /*
58  * Initialize the suspplied syncthr structure, starting the specified
59  * thread.
60  */
61 void
62 hammer2_syncthr_create(hammer2_syncthr_t *thr, hammer2_pfs_t *pmp,
63                        int clindex, void (*func)(void *arg))
64 {
65         lockinit(&thr->lk, "h2syncthr", 0, 0);
66         thr->pmp = pmp;
67         thr->clindex = clindex;
68         lwkt_create(func, thr, &thr->td, NULL, 0, -1,
69                     "h2nod-%s", pmp->pfs_names[clindex]);
70 }
71
72 /*
73  * Terminate a syncthr.  This function will silently return if the syncthr
74  * was never initialized or has already been deleted.
75  *
76  * This is accomplished by setting the STOP flag and waiting for the td
77  * structure to become NULL.
78  */
79 void
80 hammer2_syncthr_delete(hammer2_syncthr_t *thr)
81 {
82         if (thr->td == NULL)
83                 return;
84         lockmgr(&thr->lk, LK_EXCLUSIVE);
85         atomic_set_int(&thr->flags, HAMMER2_SYNCTHR_STOP);
86         wakeup(&thr->flags);
87         while (thr->td) {
88                 lksleep(thr, &thr->lk, 0, "h2thr", hz);
89         }
90         lockmgr(&thr->lk, LK_RELEASE);
91         thr->pmp = NULL;
92         lockuninit(&thr->lk);
93 }
94
95 /*
96  * Asynchronous remaster request.  Ask the synchronization thread to
97  * start over soon (as if it were frozen and unfrozen, but without waiting).
98  * The thread always recalculates mastership relationships when restarting.
99  */
100 void
101 hammer2_syncthr_remaster(hammer2_syncthr_t *thr)
102 {
103         if (thr->td == NULL)
104                 return;
105         lockmgr(&thr->lk, LK_EXCLUSIVE);
106         atomic_set_int(&thr->flags, HAMMER2_SYNCTHR_REMASTER);
107         wakeup(&thr->flags);
108         lockmgr(&thr->lk, LK_RELEASE);
109 }
110
111 void
112 hammer2_syncthr_freeze(hammer2_syncthr_t *thr)
113 {
114         if (thr->td == NULL)
115                 return;
116         lockmgr(&thr->lk, LK_EXCLUSIVE);
117         atomic_set_int(&thr->flags, HAMMER2_SYNCTHR_FREEZE);
118         wakeup(&thr->flags);
119         while ((thr->flags & HAMMER2_SYNCTHR_FROZEN) == 0) {
120                 lksleep(thr, &thr->lk, 0, "h2frz", hz);
121         }
122         lockmgr(&thr->lk, LK_RELEASE);
123 }
124
125 void
126 hammer2_syncthr_unfreeze(hammer2_syncthr_t *thr)
127 {
128         if (thr->td == NULL)
129                 return;
130         lockmgr(&thr->lk, LK_EXCLUSIVE);
131         atomic_clear_int(&thr->flags, HAMMER2_SYNCTHR_FROZEN);
132         wakeup(&thr->flags);
133         lockmgr(&thr->lk, LK_RELEASE);
134 }
135
136 /*
137  * Primary management thread for an element of a node.  A thread will exist
138  * for each element requiring management.
139  *
140  * No management threads are needed for the SPMP or for any PMP with only
141  * a single MASTER.
142  *
143  * On the SPMP - handles bulkfree and dedup operations
144  * On a PFS    - handles remastering and synchronization
145  */
146 void
147 hammer2_syncthr_primary(void *arg)
148 {
149         hammer2_syncthr_t *thr = arg;
150         hammer2_cluster_t *cparent;
151         hammer2_chain_t *chain;
152         hammer2_pfs_t *pmp;
153         int errors[HAMMER2_MAXCLUSTER];
154         int error;
155
156         pmp = thr->pmp;
157
158         lockmgr(&thr->lk, LK_EXCLUSIVE);
159         while ((thr->flags & HAMMER2_SYNCTHR_STOP) == 0) {
160                 /*
161                  * Handle freeze request
162                  */
163                 if (thr->flags & HAMMER2_SYNCTHR_FREEZE) {
164                         atomic_set_int(&thr->flags, HAMMER2_SYNCTHR_FROZEN);
165                         atomic_clear_int(&thr->flags, HAMMER2_SYNCTHR_FREEZE);
166                 }
167
168                 /*
169                  * Force idle if frozen until unfrozen or stopped.
170                  */
171                 if (thr->flags & HAMMER2_SYNCTHR_FROZEN) {
172                         lksleep(&thr->flags, &thr->lk, 0, "frozen", 0);
173                         continue;
174                 }
175
176                 /*
177                  * Reset state on REMASTER request
178                  */
179                 if (thr->flags & HAMMER2_SYNCTHR_REMASTER) {
180                         atomic_clear_int(&thr->flags, HAMMER2_SYNCTHR_REMASTER);
181                         /* reset state */
182                 }
183
184                 /*
185                  * Synchronization scan.
186                  */
187                 hammer2_trans_init(&thr->trans, pmp, HAMMER2_TRANS_KEEPMODIFY);
188                 cparent = hammer2_inode_lock(pmp->iroot,
189                                              HAMMER2_RESOLVE_ALWAYS);
190                 hammer2_update_pfs_status(thr, cparent);
191                 hammer2_inode_unlock(pmp->iroot, NULL);
192                 bzero(errors, sizeof(errors));
193                 kprintf("sync_slaves clindex %d\n", thr->clindex);
194
195                 /*
196                  * We are the syncer, not a normal frontend operator,
197                  * so force cparent good to prime the scan.
198                  */
199                 hammer2_cluster_forcegood(cparent);
200                 error = hammer2_sync_slaves(thr, cparent, errors);
201                 if (error)
202                         kprintf("hammer2_sync_slaves: error %d\n", error);
203                 chain = cparent->array[thr->clindex].chain;
204
205                 /*
206                  * Retain chain for our node and release the cluster.
207                  */
208                 hammer2_chain_ref(chain);
209                 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
210                 hammer2_cluster_unlock(cparent);
211                 hammer2_cluster_drop(cparent);
212
213                 /*
214                  * Flush the chain.
215                  */
216                 hammer2_flush(&thr->trans, chain, 1);
217                 hammer2_chain_unlock(chain);
218                 hammer2_chain_drop(chain);
219
220                 hammer2_trans_done(&thr->trans);
221
222                 /*
223                  * Wait for event, or 5-second poll.
224                  */
225                 lksleep(&thr->flags, &thr->lk, 0, "h2idle", hz * 5);
226         }
227         thr->td = NULL;
228         wakeup(thr);
229         lockmgr(&thr->lk, LK_RELEASE);
230         /* thr structure can go invalid after this point */
231 }
232
233 /*
234  * Given a locked cluster created from pmp->iroot, update the PFS's
235  * reporting status.
236  */
237 static
238 void
239 hammer2_update_pfs_status(hammer2_syncthr_t *thr, hammer2_cluster_t *cparent)
240 {
241         hammer2_pfs_t *pmp = thr->pmp;
242         uint32_t flags;
243
244         flags = cparent->flags & HAMMER2_CLUSTER_ZFLAGS;
245         if (pmp->flags == flags)
246                 return;
247         pmp->flags = flags;
248
249         kprintf("pfs %p", pmp);
250         if (flags & HAMMER2_CLUSTER_MSYNCED)
251                 kprintf(" masters-all-good");
252         if (flags & HAMMER2_CLUSTER_SSYNCED)
253                 kprintf(" slaves-all-good");
254
255         if (flags & HAMMER2_CLUSTER_WRHARD)
256                 kprintf(" quorum/rw");
257         else if (flags & HAMMER2_CLUSTER_RDHARD)
258                 kprintf(" quorum/ro");
259
260         if (flags & HAMMER2_CLUSTER_UNHARD)
261                 kprintf(" out-of-sync-masters");
262         else if (flags & HAMMER2_CLUSTER_NOHARD)
263                 kprintf(" no-masters-visible");
264
265         if (flags & HAMMER2_CLUSTER_WRSOFT)
266                 kprintf(" soft/rw");
267         else if (flags & HAMMER2_CLUSTER_RDSOFT)
268                 kprintf(" soft/ro");
269
270         if (flags & HAMMER2_CLUSTER_UNSOFT)
271                 kprintf(" out-of-sync-slaves");
272         else if (flags & HAMMER2_CLUSTER_NOSOFT)
273                 kprintf(" no-slaves-visible");
274         kprintf("\n");
275 }
276
277 static
278 void
279 dumpcluster(const char *label,
280             hammer2_cluster_t *cparent, hammer2_cluster_t *cluster)
281 {
282         hammer2_chain_t *chain;
283         int i;
284
285         if ((hammer2_debug & 1) == 0)
286                 return;
287
288         kprintf("%s\t", label);
289         KKASSERT(cparent->nchains == cluster->nchains);
290         for (i = 0; i < cparent->nchains; ++i) {
291                 if (i)
292                         kprintf("\t");
293                 kprintf("%d ", i);
294                 if ((chain = cparent->array[i].chain) != NULL) {
295                         kprintf("%016jx%s ",
296                                 chain->bref.key,
297                                 ((cparent->array[i].flags &
298                                   HAMMER2_CITEM_INVALID) ? "(I)" : "   ")
299                         );
300                 } else {
301                         kprintf("      NULL      %s ", "   ");
302                 }
303                 if ((chain = cluster->array[i].chain) != NULL) {
304                         kprintf("%016jx%s ",
305                                 chain->bref.key,
306                                 ((cluster->array[i].flags &
307                                   HAMMER2_CITEM_INVALID) ? "(I)" : "   ")
308                         );
309                 } else {
310                         kprintf("      NULL      %s ", "   ");
311                 }
312                 kprintf("\n");
313         }
314 }
315
316 /*
317  * TODO - have cparent use a shared lock normally instead of exclusive,
318  *        (needs to be upgraded for slave adjustments).
319  */
320 static
321 int
322 hammer2_sync_slaves(hammer2_syncthr_t *thr, hammer2_cluster_t *cparent,
323                     int *errors)
324 {
325         hammer2_pfs_t *pmp;
326         hammer2_cluster_t *cluster;
327         hammer2_cluster_t *scluster;
328         hammer2_chain_t *focus;
329         hammer2_chain_t *chain;
330         hammer2_key_t key_next;
331         int error;
332         int nerror;
333         int idx;
334         int n;
335         int nowork;
336         int dorecursion;
337
338         pmp = thr->pmp;
339         idx = thr->clindex;     /* cluster node we are responsible for */
340
341         /*
342          * Nothing to do if all slaves are synchronized.
343          * Nothing to do if cluster not authoritatively readable.
344          */
345         if (pmp->flags & HAMMER2_CLUSTER_SSYNCED)
346                 return(0);
347         if ((pmp->flags & HAMMER2_CLUSTER_RDHARD) == 0)
348                 return(HAMMER2_ERROR_INCOMPLETE);
349
350         error = 0;
351
352         /*
353          * XXX snapshot the source to provide a stable source to copy.
354          */
355
356         /*
357          * Update all local slaves (remote slaves are handled by the sync
358          * threads on their respective hosts).
359          *
360          * Do a full topology scan, insert/delete elements on slaves as
361          * needed.  cparent must be ref'd so we can unlock and relock it
362          * on the recursion.
363          *
364          * ALLNODES - Allows clusters with a NULL focus to be returned if
365          *            elements remain on other nodes.
366          */
367         hammer2_cluster_ref(cparent);
368         cluster = hammer2_cluster_lookup(cparent, &key_next,
369                                          HAMMER2_KEY_MIN, HAMMER2_KEY_MAX,
370                                          HAMMER2_LOOKUP_NODATA |
371                                          HAMMER2_LOOKUP_NOLOCK |
372                                          HAMMER2_LOOKUP_NODIRECT |
373                                          HAMMER2_LOOKUP_ALLNODES);
374         dumpcluster("lookup", cparent, cluster);
375
376         /*
377          * Scan elements
378          */
379         while (cluster) {
380                 /*
381                  * nowork is adjusted during the loop,
382                  * dorecursion is calculated here.
383                  */
384                 nowork = 1;
385                 focus = cluster->focus;
386                 if (focus && focus->bref.type == HAMMER2_BREF_TYPE_INODE)
387                         dorecursion = 1;
388                 else
389                         dorecursion = 0;
390
391                 if (idx == 3 && (hammer2_debug & 1) && focus)
392                         kprintf("scan3 focus %d.%016jx %d.%016jx\n",
393                             (cparent ? cparent->focus->bref.type : 0xFF),
394                             (cparent ? cparent->focus->bref.key : (uintmax_t)-1LLU),
395                             focus->bref.type, focus->bref.key);
396 repeat1:
397                 /*
398                  * Synchronize chains to focus
399                  */
400                 if (idx >= cluster->nchains)
401                         goto skip1;
402                 chain = cluster->array[idx].chain;
403                 if (idx == 3 && (hammer2_debug & 1) && chain)
404                         kprintf("scan3 slave %d.%016jx %d.%016jx\n",
405                             ((cparent && cparent->array[idx].chain) ? cparent->array[idx].chain->bref.type : 0xFF),
406                             ((cparent && cparent->array[idx].chain) ? cparent->array[idx].chain->bref.key : (uintmax_t)-1LLU),
407                             cluster->array[idx].chain->bref.type,
408                             cluster->array[idx].chain->bref.key);
409                 if (idx == 3 && (hammer2_debug & 1) && chain == NULL)
410                         kprintf("scan3 slave %d.%16jx NULL\n",
411                             ((cparent && cparent->array[idx].chain) ? cparent->array[idx].chain->bref.type : 0xFF),
412                             ((cparent && cparent->array[idx].chain) ? cparent->array[idx].chain->bref.key : (uintmax_t)-1LLU)
413                         );
414
415                 /*
416                  * Disable recursion for this index and loop up
417                  * if a chain error is detected.
418                  *
419                  * A NULL chain is ok, it simply indicates that
420                  * the slave reached the end of its scan, but we
421                  * might have stuff from the master that still
422                  * needs to be copied in.
423                  */
424                 if (chain && chain->error) {
425                         kprintf("chain error index %d: %d\n",
426                                 idx, chain->error);
427                         errors[idx] = chain->error;
428                         error = chain->error;
429                         cluster->array[idx].flags |= HAMMER2_CITEM_INVALID;
430                         goto skip1;
431                 }
432
433                 /*
434                  * Skip if the slave already has the record (everything
435                  * matches including the modify_tid).  Note that the
436                  * mirror_tid does not have to match, mirror_tid is
437                  * a per-block-device entity.
438                  */
439                 if (chain &&
440                     (cluster->array[idx].flags & HAMMER2_CITEM_INVALID) == 0) {
441                         goto skip1;
442                 }
443
444                 /*
445                  * Invalid element needs to be updated.
446                  */
447                 nowork = 0;
448
449                 /*
450                  * Otherwise adjust the slave.  Compare the focus to
451                  * the chain.  Note that focus and chain can
452                  * independently be NULL.
453                  */
454                 KKASSERT(cluster->focus == focus);
455                 if (focus) {
456                         if (chain)
457                                 n = hammer2_chain_cmp(focus, chain);
458                         else
459                                 n = -1; /* end-of-scan on slave */
460                 } else {
461                         if (chain)
462                                 n = 1;  /* end-of-scan on focus */
463                         else
464                                 n = 0;  /* end-of-scan on both */
465                 }
466
467                 if (n < 0) {
468                         /*
469                          * slave chain missing, create missing chain.
470                          *
471                          * If we are going to recurse we have to set
472                          * the initial modify_tid to 0 until the
473                          * sub-tree is completely synchronized.
474                          * Setting (n = 0) in this situation forces
475                          * the replacement call to run on the way
476                          * back up after the sub-tree has
477                          * synchronized.
478                          */
479                         if (dorecursion) {
480                                 nerror = hammer2_sync_insert(
481                                                 thr, cparent, cluster,
482                                                 0,
483                                                 idx, errors);
484                                 if (nerror == 0)
485                                         n = 0;
486                         } else {
487                                 nerror = hammer2_sync_insert(
488                                                 thr, cparent, cluster,
489                                                 focus->bref.modify_tid,
490                                                 idx, errors);
491                         }
492                 } else if (n > 0) {
493                         /*
494                          * excess slave chain, destroy
495                          */
496                         nerror = hammer2_sync_destroy(thr,
497                                                       cparent, cluster,
498                                                       idx, errors);
499                         hammer2_cluster_next_single_chain(
500                                 cparent, cluster,
501                                 &key_next,
502                                 HAMMER2_KEY_MIN,
503                                 HAMMER2_KEY_MAX,
504                                 idx,
505                                 HAMMER2_LOOKUP_NODATA |
506                                 HAMMER2_LOOKUP_NOLOCK |
507                                 HAMMER2_LOOKUP_NODIRECT |
508                                 HAMMER2_LOOKUP_ALLNODES);
509                         /*
510                          * Re-execute same index, there might be more
511                          * items to delete before this slave catches
512                          * up to the focus.
513                          */
514                         goto repeat1;
515                 } else {
516                         /*
517                          * Key matched but INVALID was set which likely
518                          * means that modify_tid is out of sync.
519                          *
520                          * If we are going to recurse we have to do
521                          * a partial replacement of the parent to
522                          * ensure that the block array is compatible.
523                          * For example, the current slave inode might
524                          * be flagged DIRECTDATA when the focus is not.
525                          * We must set modify_tid to 0 for now and
526                          * will fix it when recursion is complete.
527                          *
528                          * If we are not going to recurse we can do
529                          * a normal replacement.
530                          *
531                          * focus && chain can both be NULL on a match.
532                          */
533                         if (dorecursion) {
534                                 nerror = hammer2_sync_replace(
535                                                 thr, cparent, cluster,
536                                                 0,
537                                                 idx, errors);
538                         } else if (focus) {
539                                 nerror = hammer2_sync_replace(
540                                                 thr, cparent, cluster,
541                                                 focus->bref.modify_tid,
542                                                 idx, errors);
543                         } else {
544                                 nerror = 0;
545                         }
546                 }
547                 if (nerror)
548                         error = nerror;
549                 /* finished primary synchronization of chains */
550
551 skip1:
552 #if 0
553                 /*
554                  * Operation may have modified cparent, we must replace
555                  * iroot->cluster if we are at the top level.
556                  */
557                 if (thr->depth == 0)
558                         hammer2_inode_repoint_one(pmp->iroot, cparent, idx);
559 #endif
560                 KKASSERT(cluster->focus == focus);
561
562                 /*
563                  * If no work to do this iteration, skip any recursion.
564                  */
565                 if (nowork)
566                         goto skip2;
567
568                 /*
569                  * EXECUTE RECURSION (skip if no recursion)
570                  *
571                  * Indirect blocks are absorbed by the iteration so we only
572                  * have to recurse on inodes.
573                  *
574                  * Do not resolve scluster, it represents the iteration
575                  * parent and while it is logically in-sync the physical
576                  * elements might not match due to the presence of indirect
577                  * blocks and such.
578                  */
579                 if (dorecursion == 0)
580                         goto skip2;
581                 if (thr->depth > 20) {
582                         kprintf("depth limit reached\n");
583                         nerror = HAMMER2_ERROR_DEPTH;
584                 } else {
585                         hammer2_cluster_unlock(cparent);
586                         scluster = hammer2_cluster_copy(cluster);
587                         hammer2_cluster_lock(scluster, HAMMER2_RESOLVE_ALWAYS);
588                         ++thr->depth;
589                         nerror = hammer2_sync_slaves(thr, scluster, errors);
590                         --thr->depth;
591                         hammer2_cluster_unlock(scluster);
592                         hammer2_cluster_drop(scluster);
593                         /* XXX modify_tid on scluster */
594                         /* flush needs to not update modify_tid */
595                         hammer2_cluster_lock(cparent, HAMMER2_RESOLVE_ALWAYS);
596                 }
597                 if (nerror)
598                         goto skip2;
599
600                 /*
601                  * Fixup parent nodes on the way back up from the recursion
602                  * if no error occurred.  The modify_tid for these nodes
603                  * would have been set to 0 and must be set to their final
604                  * value.
605                  */
606                 chain = cluster->array[idx].chain;
607                 if (chain == NULL || chain->error)
608                         goto skip2;
609                 /*
610                  * should not be set but must fixup parents.
611                 if ((cluster->array[idx].flags & HAMMER2_CITEM_INVALID) == 0)
612                         goto skip2;
613                 */
614
615                 /*
616                  * At this point we have to have key-matched non-NULL
617                  * elements.
618                  */
619                 n = hammer2_chain_cmp(focus, chain);
620                 if (n != 0) {
621                         kprintf("hammer2_sync_slaves: illegal "
622                                 "post-recursion state %d\n", n);
623                         goto skip2;
624                 }
625
626                 /*
627                  * Update modify_tid on the way back up.
628                  */
629                 nerror = hammer2_sync_replace(
630                                 thr, cparent, cluster,
631                                 focus->bref.modify_tid,
632                                 idx, errors);
633                 if (nerror)
634                         error = nerror;
635
636 #if 0
637                 /*
638                  * Operation may modify cparent, must replace
639                  * iroot->cluster if we are at the top level.
640                  */
641                 if (thr->depth == 0)
642                         hammer2_inode_repoint_one(pmp->iroot, cparent, idx);
643 #endif
644
645 skip2:
646                 /*
647                  * Iterate.
648                  */
649                 dumpcluster("adjust", cparent, cluster);
650                 cluster = hammer2_cluster_next(cparent, cluster,
651                                                &key_next,
652                                                HAMMER2_KEY_MIN,
653                                                HAMMER2_KEY_MAX,
654                                                HAMMER2_LOOKUP_NODATA |
655                                                HAMMER2_LOOKUP_NOLOCK |
656                                                HAMMER2_LOOKUP_NODIRECT |
657                                                HAMMER2_LOOKUP_ALLNODES);
658                 dumpcluster("nextcl", cparent, cluster);
659         }
660         hammer2_cluster_drop(cparent);
661         if (cluster)
662                 hammer2_cluster_drop(cluster);
663
664         return error;
665 }
666
667 /*
668  * cparent is locked exclusively, with an extra ref, cluster is not locked.
669  */
670 static
671 int
672 hammer2_sync_insert(hammer2_syncthr_t *thr,
673                     hammer2_cluster_t *cparent, hammer2_cluster_t *cluster,
674                     hammer2_tid_t modify_tid, int i, int *errors)
675 {
676         hammer2_chain_t *focus;
677         hammer2_chain_t *chain;
678         hammer2_key_t dummy;
679
680         focus = cluster->focus;
681 #if HAMMER2_SYNCTHR_DEBUG
682         if (hammer2_debug & 1)
683         kprintf("insert rec par=%p/%d.%016jx slave %d %d.%016jx mod=%016jx\n",
684                 cparent->array[i].chain, 
685                 cparent->array[i].chain->bref.type,
686                 cparent->array[i].chain->bref.key,
687                 i, focus->bref.type, focus->bref.key, modify_tid);
688 #endif
689
690         /*
691          * We have to do a lookup to position ourselves at the correct
692          * parent when inserting a record into a new slave because the
693          * cluster iteration for this slave might not be pointing to the
694          * right place.  Our expectation is that the record will not be
695          * found.
696          */
697         hammer2_cluster_unlock_except(cparent, i);
698         chain = hammer2_chain_lookup(&cparent->array[i].chain, &dummy,
699                                      focus->bref.key, focus->bref.key,
700                                      &cparent->array[i].cache_index,
701                                      HAMMER2_LOOKUP_NODIRECT);
702         if (cparent->focus_index == i)
703                 cparent->focus = cparent->array[i].chain;
704         KKASSERT(chain == NULL);
705
706         /*
707          * Create the missing chain.
708          *
709          * Have to be careful to avoid deadlocks.
710          */
711         chain = NULL;
712         if (cluster->focus_index < i)
713                 hammer2_chain_lock(focus, HAMMER2_RESOLVE_ALWAYS);
714         hammer2_chain_create(&thr->trans, &cparent->array[i].chain,
715                              &chain, thr->pmp,
716                              focus->bref.key, focus->bref.keybits,
717                              focus->bref.type, focus->bytes,
718                              0);
719         if (cluster->focus_index > i)
720                 hammer2_chain_lock(focus, HAMMER2_RESOLVE_ALWAYS);
721         if (cparent->focus_index == i)
722                 cparent->focus = cparent->array[i].chain;
723         hammer2_chain_modify(&thr->trans, chain, 0);
724
725         /*
726          * Copy focus to new chain
727          */
728
729         /* type already set */
730         chain->bref.methods = focus->bref.methods;
731         /* keybits already set */
732         chain->bref.vradix = focus->bref.vradix;
733         /* mirror_tid set by flush */
734         chain->bref.modify_tid = modify_tid;
735         chain->bref.flags = focus->bref.flags;
736         /* key already present */
737         /* check code will be recalculated */
738
739         /*
740          * Copy data body.
741          */
742         switch(chain->bref.type) {
743         case HAMMER2_BREF_TYPE_INODE:
744                 if ((focus->data->ipdata.op_flags &
745                      HAMMER2_OPFLAG_DIRECTDATA) == 0) {
746                         bcopy(focus->data, chain->data,
747                               offsetof(hammer2_inode_data_t, u));
748                         break;
749                 }
750                 /* fall through */
751         case HAMMER2_BREF_TYPE_DATA:
752                 bcopy(focus->data, chain->data, chain->bytes);
753                 hammer2_chain_setcheck(chain, chain->data);
754                 break;
755         default:
756                 KKASSERT(0);
757                 break;
758         }
759
760         hammer2_chain_unlock(focus);
761         hammer2_chain_unlock(chain);            /* unlock, leave ref */
762
763         /*
764          * Avoid ordering deadlock when relocking cparent.
765          */
766         if (i == 0) {
767                 hammer2_cluster_lock_except(cparent, i, HAMMER2_RESOLVE_ALWAYS);
768         } else {
769                 hammer2_chain_unlock(cparent->array[i].chain);
770                 hammer2_cluster_lock(cparent, HAMMER2_RESOLVE_ALWAYS);
771         }
772
773         /*
774          * Enter item into (unlocked) cluster.
775          *
776          * Must clear invalid for iteration to work properly.
777          */
778         if (cluster->array[i].chain)
779                 hammer2_chain_drop(cluster->array[i].chain);
780         cluster->array[i].chain = chain;
781         cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
782
783         return 0;
784 }
785
786 /*
787  * cparent is locked exclusively, with an extra ref, cluster is not locked.
788  */
789 static
790 int
791 hammer2_sync_destroy(hammer2_syncthr_t *thr,
792                      hammer2_cluster_t *cparent, hammer2_cluster_t *cluster,
793                      int i, int *errors)
794 {
795         hammer2_chain_t *chain;
796
797         chain = cluster->array[i].chain;
798 #if HAMMER2_SYNCTHR_DEBUG
799         if (hammer2_debug & 1)
800         kprintf("destroy rec %p/%p slave %d %d.%016jx\n",
801                 cparent, cluster,
802                 i, chain->bref.type, chain->bref.key);
803 #endif
804         /*
805          * Try to avoid unnecessary I/O.
806          *
807          * XXX accounting not propagated up properly.  We might have to do
808          *     a RESOLVE_MAYBE here and pass 0 for the flags.
809          */
810         hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER);
811         hammer2_chain_delete(&thr->trans, cparent->array[i].chain, chain,
812                              HAMMER2_DELETE_NOSTATS |
813                              HAMMER2_DELETE_PERMANENT);
814         hammer2_chain_unlock(chain);
815
816         /*
817          * The element is not valid in that it doesn't match the other
818          * elements, but we have to mark it valid here to allow the
819          * cluster_next() call to advance this index to the next element.
820          */
821         cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
822
823         return 0;
824 }
825
826 /*
827  * cparent is locked exclusively, with an extra ref, cluster is not locked.
828  * Replace element [i] in the cluster.
829  */
830 static
831 int
832 hammer2_sync_replace(hammer2_syncthr_t *thr,
833                      hammer2_cluster_t *cparent, hammer2_cluster_t *cluster,
834                      hammer2_tid_t modify_tid, int i, int *errors)
835 {
836         hammer2_chain_t *focus;
837         hammer2_chain_t *chain;
838         int nradix;
839         uint8_t otype;
840
841         focus = cluster->focus;
842         chain = cluster->array[i].chain;
843 #if HAMMER2_SYNCTHR_DEBUG
844         if (hammer2_debug & 1)
845         kprintf("replace rec %p/%p slave %d %d.%016jx mod=%016jx\n",
846                 cparent, cluster,
847                 i, focus->bref.type, focus->bref.key, modify_tid);
848 #endif
849         if (cluster->focus_index < i)
850                 hammer2_chain_lock(focus, HAMMER2_RESOLVE_ALWAYS);
851         hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
852         if (cluster->focus_index >= i)
853                 hammer2_chain_lock(focus, HAMMER2_RESOLVE_ALWAYS);
854         if (chain->bytes != focus->bytes) {
855                 /* XXX what if compressed? */
856                 nradix = hammer2_getradix(chain->bytes);
857                 hammer2_chain_resize(&thr->trans, NULL,
858                                      cparent->array[i].chain, chain,
859                                      nradix, 0);
860         }
861         hammer2_chain_modify(&thr->trans, chain, 0);
862         otype = chain->bref.type;
863         chain->bref.type = focus->bref.type;
864         chain->bref.methods = focus->bref.methods;
865         chain->bref.keybits = focus->bref.keybits;
866         chain->bref.vradix = focus->bref.vradix;
867         /* mirror_tid updated by flush */
868         chain->bref.modify_tid = modify_tid;
869         chain->bref.flags = focus->bref.flags;
870         /* key already present */
871         /* check code will be recalculated */
872         chain->error = 0;
873
874         /*
875          * Copy data body.
876          */
877         switch(chain->bref.type) {
878         case HAMMER2_BREF_TYPE_INODE:
879                 if ((focus->data->ipdata.op_flags &
880                      HAMMER2_OPFLAG_DIRECTDATA) == 0) {
881                         /*
882                          * If DIRECTDATA is transitioning to 0 or the old
883                          * chain is not an inode we have to initialize
884                          * the block table.
885                          */
886                         if (otype != HAMMER2_BREF_TYPE_INODE ||
887                             (chain->data->ipdata.op_flags &
888                              HAMMER2_OPFLAG_DIRECTDATA)) {
889                                 kprintf("chain inode transiiton away from dd\n");
890                                 bzero(&chain->data->ipdata.u,
891                                       sizeof(chain->data->ipdata.u));
892                         }
893                         bcopy(focus->data, chain->data,
894                               offsetof(hammer2_inode_data_t, u));
895                         /* XXX setcheck on inode should not be needed */
896                         hammer2_chain_setcheck(chain, chain->data);
897                         break;
898                 }
899                 /* fall through */
900         case HAMMER2_BREF_TYPE_DATA:
901                 bcopy(focus->data, chain->data, chain->bytes);
902                 hammer2_chain_setcheck(chain, chain->data);
903                 break;
904         default:
905                 KKASSERT(0);
906                 break;
907         }
908
909         hammer2_chain_unlock(focus);
910         hammer2_chain_unlock(chain);
911
912         /*
913          * Must clear invalid for iteration to work properly.
914          */
915         cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
916
917         return 0;
918 }