1f3334b5ce702117f3038db998c5192bca1ce059
[dragonfly.git] / sys / vfs / hammer2 / hammer2_chain.c
1 /*
2  * Copyright (c) 2011-2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 /*
36  * This subsystem handles direct and indirect block searches, recursions,
37  * creation, and deletion.  Chains of blockrefs are tracked and modifications
38  * are flag for propagation... eventually all the way back to the volume
39  * header.
40  */
41
42 #include <sys/cdefs.h>
43 #include <sys/cdefs.h>
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/types.h>
47 #include <sys/lock.h>
48 #include <sys/uuid.h>
49
50 #include "hammer2.h"
51
52 SPLAY_GENERATE(hammer2_chain_splay, hammer2_chain, snode, hammer2_chain_cmp);
53
54 static int hammer2_indirect_optimize;   /* XXX SYSCTL */
55
56 static hammer2_chain_t *hammer2_chain_create_indirect(
57                         hammer2_mount_t *hmp, hammer2_chain_t *parent,
58                         hammer2_key_t key, int keybits);
59
60 /*
61  * Compare function for chain splay tree
62  */
63 int
64 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2)
65 {
66         return(chain2->index - chain1->index);
67 }
68
69 /*
70  * Allocate a new disconnected chain element representing the specified
71  * bref.  The chain element is locked exclusively and refs is set to 1.
72  *
73  * This essentially allocates a system memory structure representing one
74  * of the media structure types, including inodes.
75  */
76 hammer2_chain_t *
77 hammer2_chain_alloc(hammer2_mount_t *hmp, hammer2_blockref_t *bref)
78 {
79         hammer2_chain_t *chain;
80         hammer2_inode_t *ip;
81         hammer2_indblock_t *np;
82         hammer2_data_t *dp;
83
84         /*
85          * Construct the appropriate system structure.
86          */
87         switch(bref->type) {
88         case HAMMER2_BREF_TYPE_INODE:
89                 ip = kmalloc(sizeof(*ip), hmp->minode, M_WAITOK | M_ZERO);
90                 chain = &ip->chain;
91                 chain->u.ip = ip;
92                 lockinit(&chain->lk, "inode", 0, LK_CANRECURSE);
93                 ip->hmp = hmp;
94                 break;
95         case HAMMER2_BREF_TYPE_INDIRECT:
96                 np = kmalloc(sizeof(*np), hmp->mchain, M_WAITOK | M_ZERO);
97                 chain = &np->chain;
98                 chain->u.np = np;
99                 lockinit(&chain->lk, "iblk", 0, LK_CANRECURSE);
100                 break;
101         case HAMMER2_BREF_TYPE_DATA:
102                 dp = kmalloc(sizeof(*dp), hmp->mchain, M_WAITOK | M_ZERO);
103                 chain = &dp->chain;
104                 chain->u.dp = dp;
105                 lockinit(&chain->lk, "dblk", 0, LK_CANRECURSE);
106                 break;
107         case HAMMER2_BREF_TYPE_VOLUME:
108                 chain = NULL;
109                 panic("hammer2_chain_get: volume type illegal for op");
110         default:
111                 chain = NULL;
112                 panic("hammer2_chain_get: unrecognized blockref type: %d",
113                       bref->type);
114         }
115         chain->bref = *bref;
116         chain->index = -1;      /* not yet assigned */
117
118         chain->refs = 1;
119         lockmgr(&chain->lk, LK_EXCLUSIVE);
120
121         return (chain);
122 }
123
124 /*
125  * Free a disconnected chain element
126  */
127 void
128 hammer2_chain_free(hammer2_mount_t *hmp, hammer2_chain_t *chain)
129 {
130         void *mem;
131
132         KKASSERT(chain->bp == NULL);
133         KKASSERT(chain->data == NULL);
134         KKASSERT(chain->bref.type != HAMMER2_BREF_TYPE_INODE ||
135                  chain->u.ip->vp == NULL);
136
137         if ((mem = chain->u.mem) != NULL) {
138                 chain->u.mem = NULL;
139                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
140                         kfree(mem, hmp->minode);
141                 else
142                         kfree(mem, hmp->mchain);
143         }
144 }
145
146 /*
147  * Add a reference to a chain element (for shared access).  The chain
148  * element must already have at least 1 ref controlled by the caller.
149  */
150 void
151 hammer2_chain_ref(hammer2_mount_t *hmp, hammer2_chain_t *chain)
152 {
153         KKASSERT(chain->refs > 0);
154         atomic_add_int(&chain->refs, 1);
155 }
156
157 /*
158  * Drop the callers reference to the chain element.  If the ref count
159  * reaches zero the chain element and its related structure (typically an
160  * inode or indirect block) will be freed and the parent will be
161  * recursively dropped.
162  *
163  * Modified elements hold an additional reference so it should not be
164  * possible for the count on a modified element to drop to 0.
165  *
166  * The chain element must NOT be locked by the caller.
167  *
168  * The parent might or might not be locked by the caller but if so it
169  * will also be referenced so we shouldn't recurse upward.
170  */
171 void
172 hammer2_chain_drop(hammer2_mount_t *hmp, hammer2_chain_t *chain)
173 {
174         hammer2_chain_t *parent;
175         u_int refs;
176
177         while (chain) {
178                 refs = chain->refs;
179                 cpu_ccfence();
180                 if (refs == 1) {
181                         KKASSERT(chain != &hmp->vchain);
182                         parent = chain->parent;
183                         lockmgr(&parent->lk, LK_EXCLUSIVE);
184                         if (atomic_cmpset_int(&chain->refs, 1, 0)) {
185                                 /*
186                                  * Succeeded, recurse and drop parent
187                                  */
188                                 SPLAY_REMOVE(hammer2_chain_splay,
189                                              &parent->shead, chain);
190                                 chain->parent = NULL;
191                                 lockmgr(&parent->lk, LK_RELEASE);
192                                 hammer2_chain_free(hmp, chain);
193                                 chain = parent;
194                         } else {
195                                 lockmgr(&parent->lk, LK_RELEASE);
196                         }
197                 } else {
198                         if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) {
199                                 /*
200                                  * Succeeded, count did not reach zero so
201                                  * cut out of the loop.
202                                  */
203                                 break;
204                         }
205                 }
206         }
207 }
208
209 /*
210  * Lock a chain element, acquiring its data with I/O if necessary.
211  *
212  * Returns 0 on success or an error code if the data could not be acquired.
213  * The chain element is locked either way.
214  *
215  * chain->data will be pointed either at the embedded data (e.g. for
216  * inodes), in which case the buffer cache buffer is released, or will
217  * point into the bp->b_data buffer with the bp left intact while locked.
218  */
219 int
220 hammer2_chain_lock(hammer2_mount_t *hmp, hammer2_chain_t *chain)
221 {
222         hammer2_blockref_t *bref;
223         hammer2_off_t off_hi;
224         size_t off_lo;
225         int error;
226         void *data;
227
228         /*
229          * Lock the element.  Under certain conditions this might end up
230          * being a recursive lock.
231          */
232         KKASSERT(chain->refs > 0);
233         lockmgr(&chain->lk, LK_EXCLUSIVE);
234
235         /*
236          * The volume header is a special case
237          */
238         if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME)
239                 return(0);
240
241         /*
242          * bp must be NULL, so if the data pointer is valid here it points
243          * to embedded data and no I/O is necessary (whether modified or not).
244          */
245         KKASSERT(chain->bp == NULL);
246         if (chain->data)
247                 return (0);
248
249         /*
250          * If data is NULL we must issue I/O.  Any error returns the error
251          * code but leaves the chain locked.
252          *
253          * If the chain was modified a new bref will have already been
254          * allocated and its related bp is probably still sitting in the
255          * buffer cache.
256          */
257         bref = &chain->bref;
258
259         off_hi = bref->data_off & HAMMER2_OFF_MASK_HI;
260         off_lo = (size_t)bref->data_off & HAMMER2_OFF_MASK_LO;
261         KKASSERT(off_hi != 0);
262         error = bread(hmp->devvp, off_hi, HAMMER2_PBUFSIZE, &chain->bp);
263
264         if (error) {
265                 kprintf("hammer2_chain_get: I/O error %016jx: %d\n",
266                         (intmax_t)off_hi, error);
267                 brelse(chain->bp);
268                 chain->bp = NULL;
269                 return (error);
270         }
271
272         /*
273          * Setup the data pointer, either pointing it to an embedded data
274          * structure and copying the data from the buffer, or pointint it
275          * into the buffer.
276          *
277          * The buffer is not retained when copying to an embedded data
278          * structure in order to avoid potential deadlocks or recursions
279          * on the same physical buffer.
280          */
281         switch (bref->type) {
282         case HAMMER2_BREF_TYPE_VOLUME:
283                 /*
284                  * Copy data from bp to embedded buffer
285                  */
286                 KKASSERT(0);    /* not yet - have mount use this soon */
287                 KKASSERT(off_hi == 0);
288                 bcopy((char *)chain->bp->b_data + off_lo,
289                       &hmp->voldata, HAMMER2_PBUFSIZE);
290                 chain->data = (void *)&hmp->voldata;
291                 brelse(chain->bp);
292                 chain->bp = NULL;
293                 break;
294         case HAMMER2_BREF_TYPE_INODE:
295                 /*
296                  * Copy data from bp to embedded buffer.
297                  */
298                 bcopy((char *)chain->bp->b_data + off_lo,
299                       &chain->u.ip->ip_data,
300                       HAMMER2_INODE_BYTES);
301                 chain->data = (void *)&chain->u.ip->ip_data;
302                 brelse(chain->bp);
303                 chain->bp = NULL;
304                 break;
305         default:
306                 /*
307                  * Leave bp intact
308                  */
309                 data = (char *)chain->bp->b_data + off_lo;
310                 chain->data = data;
311                 break;
312         }
313         return (0);
314 }
315
316 /*
317  * Convert a locked chain that was retrieved read-only to read-write.
318  *
319  * If not already marked modified a new physical block will be allocated
320  * and assigned to the bref.  If the data is pointing into an existing
321  * bp it will be copied to the new bp and the new bp will replace the
322  * existing bp.
323  *
324  * If the data is embedded we allocate the new physical block but don't
325  * bother copying the data into it (yet).
326  */
327 void
328 hammer2_chain_modify(hammer2_mount_t *hmp, hammer2_chain_t *chain)
329 {
330         hammer2_chain_t *parent;
331         struct buf *nbp;
332         size_t bytes;
333         void *ndata;
334         int error;
335
336         /*
337          * If the chain is already marked modified we can just return.
338          */
339         if (chain->flags & HAMMER2_CHAIN_MODIFIED) {
340                 KKASSERT(chain->data != NULL);
341                 return;
342         }
343
344         /*
345          * The MODIFIED bit is not yet set, we must allocate the
346          * copy-on-write block.
347          *
348          * If the data is embedded no other action is required.
349          *
350          * If the data is not embedded we acquire and clear the
351          * new block.  If chain->data is not NULL we then do the
352          * copy-on-write.  chain->data will then be repointed to the new
353          * buffer and the old buffer will be released.
354          *
355          * For newly created elements with no prior allocation we go
356          * through the copy-on-write steps except without the copying part.
357          */
358         atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED);
359         hammer2_chain_ref(hmp, chain);  /* ref for modified bit */
360
361         bytes = 1 << (int)(chain->bref.data_off & HAMMER2_OFF_MASK_RADIX);
362         if (chain != &hmp->vchain) {
363                 chain->bref.data_off = hammer2_freemap_alloc(hmp, bytes);
364                 /* XXX failed allocation */
365         }
366
367         switch(chain->bref.type) {
368         case HAMMER2_BREF_TYPE_VOLUME:          /* embedded */
369         case HAMMER2_BREF_TYPE_INODE:           /* embedded */
370                 /*
371                  * data points to embedded structure, no copy needed
372                  */
373                 error = 0;
374                 break;
375         case HAMMER2_BREF_TYPE_INDIRECT:
376         case HAMMER2_BREF_TYPE_DATA:
377                 /*
378                  * data (if not NULL) points into original bp, copy-on-write
379                  * to new block.
380                  */
381                 KKASSERT(chain != &hmp->vchain);        /* safety */
382                 if (bytes == HAMMER2_PBUFSIZE) {
383                         nbp = getblk(hmp->devvp,
384                                      chain->bref.data_off & HAMMER2_OFF_MASK_HI,
385                                      HAMMER2_PBUFSIZE, 0, 0);
386                         vfs_bio_clrbuf(nbp);
387                         error = 0;
388                 } else {
389                         error = bread(hmp->devvp,
390                                      chain->bref.data_off & HAMMER2_OFF_MASK_HI,
391                                      HAMMER2_PBUFSIZE, &nbp);
392                         KKASSERT(error == 0);/* XXX handle error */
393                 }
394                 ndata = nbp->b_data + (chain->bref.data_off &
395                                        HAMMER2_OFF_MASK_LO);
396                 if (chain->data) {
397                         bcopy(chain->data, ndata, bytes);
398                         KKASSERT(chain->bp != NULL);
399                         brelse(chain->bp);
400                 }
401                 chain->bp = nbp;
402                 chain->data = ndata;
403                 break;
404         default:
405                 panic("hammer2_chain_modify: unknown bref type");
406                 break;
407
408         }
409
410         /*
411          * Recursively mark the parent chain elements so flushes can find
412          * modified elements.
413          */
414         parent = chain->parent;
415         while (parent && (parent->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) {
416                 atomic_set_int(&parent->flags, HAMMER2_CHAIN_SUBMODIFIED);
417                 parent = parent->parent;
418         }
419 }
420
421 /*
422  * Unlock a chain element without dropping its reference count.
423  * (see hammer2_chain_put() to do both).
424  *
425  * Non-embedded data references (chain->bp != NULL) are returned to the
426  * system and the data field is cleared in that case.  If modified the
427  * dirty buffer is still returned to the system, can be flushed to disk by
428  * the system at any time, and will be reconstituted/re-read as needed.
429  */
430 void
431 hammer2_chain_unlock(hammer2_mount_t *hmp, hammer2_chain_t *chain)
432 {
433         if (chain->bp) {
434                 chain->data = NULL;
435                 if (chain->flags & (HAMMER2_CHAIN_MODIFIED |
436                                     HAMMER2_CHAIN_FLUSHED))
437                         bdwrite(chain->bp);
438                 else
439                         bqrelse(chain->bp);
440                 chain->bp = NULL;
441         }
442         lockmgr(&chain->lk, LK_RELEASE);
443 }
444
445 /*
446  * Locate an in-memory chain.  The parent must be locked.  The in-memory
447  * chain is returned or NULL if no in-memory chain is present.
448  *
449  * NOTE: A chain on-media might exist for this index when NULL is returned.
450  */
451 hammer2_chain_t *
452 hammer2_chain_find(hammer2_mount_t *hmp, hammer2_chain_t *parent, int index)
453 {
454         hammer2_chain_t dummy;
455         hammer2_chain_t *chain;
456
457         dummy.index = index;
458         chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead, &dummy);
459         return (chain);
460 }
461
462 /*
463  * Return a locked chain structure with all associated data acquired.
464  *
465  * Caller must lock the parent on call, the returned child will be locked.
466  */
467 hammer2_chain_t *
468 hammer2_chain_get(hammer2_mount_t *hmp, hammer2_chain_t *parent,
469                   int index, int flags)
470 {
471         hammer2_blockref_t *bref;
472         hammer2_chain_t *chain;
473         hammer2_chain_t dummy;
474
475         /*
476          * First see if we have a (possibly modified) chain element cached
477          * for this (parent, index).  Acquire the data if necessary.
478          *
479          * If chain->data is non-NULL the chain should already be marked
480          * modified.
481          */
482         dummy.index = index;
483         chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead, &dummy);
484         if (chain) {
485                 hammer2_chain_ref(hmp, chain);
486                 if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0)
487                         hammer2_chain_lock(hmp, chain);
488                 return(chain);
489         }
490
491         /*
492          * Otherwise lookup the bref and issue I/O (switch on the parent)
493          */
494         switch(parent->bref.type) {
495         case HAMMER2_BREF_TYPE_INODE:
496                 KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
497                 bref = &parent->data->ipdata.u.blockset.blockref[index];
498                 break;
499         case HAMMER2_BREF_TYPE_INDIRECT:
500                 KKASSERT(index >= 0 && index < HAMMER2_IND_COUNT);
501                 bref = &parent->data->npdata.blockref[index];
502                 break;
503         case HAMMER2_BREF_TYPE_VOLUME:
504                 KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
505                 bref = &hmp->voldata.sroot_blockset.blockref[index];
506                 break;
507         default:
508                 bref = NULL;
509                 panic("hammer2_chain_get: unrecognized blockref type: %d",
510                       parent->bref.type);
511         }
512         chain = hammer2_chain_alloc(hmp, bref);
513
514         /*
515          * Link the chain into its parent.  Caller is expected to hold an
516          * exclusive lock on the parent.
517          */
518         chain->parent = parent;
519         chain->index = index;
520         if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, chain))
521                 panic("hammer2_chain_link: collision");
522         KKASSERT(parent->refs > 1);
523         atomic_add_int(&parent->refs, 1);       /* for splay entry */
524
525         /*
526          * Additional linkage for inodes.  Reuse the parent pointer to
527          * find the parent directory.
528          */
529         if (bref->type == HAMMER2_BREF_TYPE_INODE) {
530                 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT)
531                         parent = parent->parent;
532                 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE)
533                         chain->u.ip->pip = parent->u.ip;
534         }
535
536         /*
537          * Our new chain structure has already been referenced and locked
538          * but the lock code handles the I/O so call it to resolve the data.
539          * Then release one of our two exclusive locks.
540          *
541          * If NOLOCK is set the release will release the one-and-only lock.
542          */
543         if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0)
544                 hammer2_chain_lock(hmp, chain);
545         lockmgr(&chain->lk, LK_RELEASE);
546
547         return (chain);
548 }
549
550 /*
551  * Unlock and dereference a chain after use.  It is possible for this to
552  * recurse up the chain.
553  */
554 void
555 hammer2_chain_put(hammer2_mount_t *hmp, hammer2_chain_t *chain)
556 {
557         hammer2_chain_unlock(hmp, chain);
558         hammer2_chain_drop(hmp, chain);
559 }
560
561 /*
562  * Locate any key between key_beg and key_end inclusive.  (*parentp)
563  * typically points to an inode but can also point to a related indirect
564  * block and this function will recurse upwards and find the inode again.
565  *
566  * (*parentp) must be exclusively locked and referenced and can be an inode
567  * or an existing indirect block within the inode.
568  *
569  * On return (*parentp) will be modified to point at the deepest parent chain
570  * element encountered during the search, as a helper for an insertion or
571  * deletion.   The new (*parentp) will be locked and referenced and the old
572  * will be unlocked and dereferenced (no change if they are both the same).
573  *
574  * The matching chain will be returned exclusively locked and referenced.
575  *
576  * NULL is returned if no match was found, but (*parentp) will still
577  * potentially be adjusted.
578  *
579  * This function will also recurse up the chain if the key is not within the
580  * current parent's range.  (*parentp) can never be set to NULL.  An iteration
581  * can simply allow (*parentp) to float inside the loop.
582  */
583 hammer2_chain_t *
584 hammer2_chain_lookup(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
585                      hammer2_key_t key_beg, hammer2_key_t key_end,
586                      int flags)
587 {
588         hammer2_chain_t *parent;
589         hammer2_chain_t *chain;
590         hammer2_chain_t *tmp;
591         hammer2_blockref_t *base;
592         hammer2_blockref_t *bref;
593         hammer2_key_t scan_beg;
594         hammer2_key_t scan_end;
595         int count = 0;
596         int i;
597
598         /*
599          * Recurse (*parentp) upward if necessary until the parent completely
600          * encloses the key range or we hit the inode.
601          */
602         parent = *parentp;
603         while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
604                 scan_beg = parent->bref.key;
605                 scan_end = scan_beg +
606                            ((hammer2_key_t)1 << parent->bref.keybits) - 1;
607                 if (key_beg >= scan_beg && key_end <= scan_end)
608                         break;
609                 hammer2_chain_unlock(hmp, parent);
610                 parent = parent->parent;
611                 hammer2_chain_ref(hmp, parent);         /* ref new parent */
612                 hammer2_chain_lock(hmp, parent);        /* lock new parent */
613                 hammer2_chain_drop(hmp, *parentp);      /* drop old parent */
614                 *parentp = parent;                      /* new parent */
615         }
616
617 again:
618         /*
619          * Locate the blockref array.  Currently we do a fully associative
620          * search through the array.
621          */
622         switch(parent->bref.type) {
623         case HAMMER2_BREF_TYPE_INODE:
624                 base = &parent->data->ipdata.u.blockset.blockref[0];
625                 count = HAMMER2_SET_COUNT;
626                 break;
627         case HAMMER2_BREF_TYPE_INDIRECT:
628                 if (parent->data == NULL)
629                         panic("parent->data is NULL");
630                 base = &parent->data->npdata.blockref[0];
631                 count = HAMMER2_IND_COUNT;
632                 break;
633         case HAMMER2_BREF_TYPE_VOLUME:
634                 base = &hmp->voldata.sroot_blockset.blockref[0];
635                 count = HAMMER2_SET_COUNT;
636                 break;
637         default:
638                 panic("hammer2_chain_push: unrecognized blockref type: %d",
639                       parent->bref.type);
640                 base = NULL;    /* safety */
641                 count = 0;      /* safety */
642         }
643
644         /*
645          * If the element and key overlap we use the element.
646          */
647         bref = NULL;
648         for (i = 0; i < count; ++i) {
649                 tmp = hammer2_chain_find(hmp, parent, i);
650                 bref = (tmp) ? &tmp->bref : &base[i];
651                 if (bref->type == 0)
652                         continue;
653                 scan_beg = bref->key;
654                 scan_end = scan_beg + ((hammer2_key_t)1 << bref->keybits) - 1;
655                 if (key_beg <= scan_end && key_end >= scan_beg)
656                         break;
657         }
658         if (i == count) {
659                 if (key_beg == key_end)
660                         return (NULL);
661                 return (hammer2_chain_next(hmp, parentp, NULL,
662                                            key_beg, key_end, flags));
663         }
664
665         /*
666          * Acquire the new chain element.  If the chain element is an
667          * indirect block we must search recursively.
668          */
669         chain = hammer2_chain_get(hmp, parent, i, flags);
670         if (chain == NULL)
671                 return (NULL);
672
673         /*
674          * If the chain element is an indirect block it becomes the new
675          * parent and we loop on it.  We must fixup the chain we loop on
676          * if the caller passed flags to us that aren't sufficient for our
677          * needs.
678          */
679         if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
680                 hammer2_chain_put(hmp, parent);
681                 *parentp = parent = chain;
682                 if (flags & HAMMER2_LOOKUP_NOLOCK)
683                         hammer2_chain_lock(hmp, chain);
684                 goto again;
685         }
686
687         /*
688          * All done, return chain
689          */
690         return (chain);
691 }
692
693 /*
694  * After having issued a lookup we can iterate all matching keys.
695  *
696  * If chain is non-NULL we continue the iteration from just after it's index.
697  *
698  * If chain is NULL we assume the parent was exhausted and continue the
699  * iteration at the next parent.
700  */
701 hammer2_chain_t *
702 hammer2_chain_next(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
703                    hammer2_chain_t *chain,
704                    hammer2_key_t key_beg, hammer2_key_t key_end,
705                    int flags)
706 {
707         hammer2_chain_t *parent;
708         hammer2_chain_t *tmp;
709         hammer2_blockref_t *base;
710         hammer2_blockref_t *bref;
711         hammer2_key_t scan_beg;
712         hammer2_key_t scan_end;
713         int i;
714         int count;
715
716         parent = *parentp;
717
718 again:
719         /*
720          * Calculate the next index and recalculate the parent if necessary.
721          */
722         if (chain) {
723                 /*
724                  * Continue iteration within current parent
725                  */
726                 i = chain->index + 1;
727                 hammer2_chain_put(hmp, chain);
728                 chain = NULL;
729         } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT) {
730                 /*
731                  * We reached the end of the iteration.
732                  */
733                 return (NULL);
734         } else {
735                 /*
736                  * Continue iteration with next parent
737                  */
738                 hammer2_chain_t *nparent;
739
740                 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT)
741                         return (NULL);
742                 i = parent->index + 1;
743                 nparent = parent->parent;
744                 hammer2_chain_ref(hmp, nparent);        /* ref new parent */
745                 hammer2_chain_unlock(hmp, parent);
746                 hammer2_chain_lock(hmp, nparent);       /* lock new parent */
747                 hammer2_chain_drop(hmp, parent);        /* drop old parent */
748                 *parentp = parent = nparent;
749         }
750
751 again2:
752         /*
753          * Locate the blockref array.  Currently we do a fully associative
754          * search through the array.
755          */
756         switch(parent->bref.type) {
757         case HAMMER2_BREF_TYPE_INODE:
758                 base = &parent->data->ipdata.u.blockset.blockref[0];
759                 count = HAMMER2_SET_COUNT;
760                 break;
761         case HAMMER2_BREF_TYPE_INDIRECT:
762                 base = &parent->data->npdata.blockref[0];
763                 count = HAMMER2_IND_COUNT;
764                 break;
765         case HAMMER2_BREF_TYPE_VOLUME:
766                 base = &hmp->voldata.sroot_blockset.blockref[0];
767                 count = HAMMER2_SET_COUNT;
768                 break;
769         default:
770                 panic("hammer2_chain_push: unrecognized blockref type: %d",
771                       parent->bref.type);
772                 base = NULL;    /* safety */
773                 count = 0;      /* safety */
774                 break;
775         }
776         KKASSERT(i <= count);
777
778         /*
779          * Look for the key.  If we are unable to find a match and an exact
780          * match was requested we return NULL.  If a range was requested we
781          * run hammer2_chain_next() to iterate.
782          */
783         bref = NULL;
784         while (i < count) {
785                 tmp = hammer2_chain_find(hmp, parent, i);
786                 bref = (tmp) ? &tmp->bref : &base[i];
787                 if (bref->type == 0) {
788                         ++i;
789                         continue;
790                 }
791 #if 0
792                 kprintf("nextxx(%016jx,%d) %d: %016jx/%d\n",
793                         parent->bref.data_off, i,
794                         bref->type,bref->key, bref->keybits);
795 #endif
796                 scan_beg = bref->key;
797                 scan_end = scan_beg + ((hammer2_key_t)1 << bref->keybits) - 1;
798                 if (key_beg <= scan_end && key_end >= scan_beg)
799                         break;
800                 ++i;
801         }
802
803         /*
804          * If we couldn't find a match recurse up a parent to continue the
805          * search.
806          */
807         if (i == count)
808                 goto again;
809
810         /*
811          * Acquire the new chain element.  If the chain element is an
812          * indirect block we must search recursively.
813          */
814         chain = hammer2_chain_get(hmp, parent, i, flags);
815         if (chain == NULL)
816                 return (NULL);
817
818         /*
819          * If the chain element is an indirect block it becomes the new
820          * parent and we loop on it.
821          */
822         if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
823                 hammer2_chain_put(hmp, parent);
824                 *parentp = parent = chain;
825                 i = 0;
826                 goto again2;
827         }
828
829         /*
830          * All done, return chain
831          */
832         return (chain);
833 }
834
835 /*
836  * Create and return a new hammer2 system memory structure of the specified
837  * key, type and size and insert it under (parent).  (parent) is typically
838  * acquired as a side effect of issuing a prior lookup.  parent must be locked
839  * and held.
840  *
841  * Non-indirect types will automatically allocate indirect blocks as required
842  * if the new item does not fit in the current (parent).
843  *
844  * Indirect types will move a portion of the existing blockref array in
845  * (parent) into the new indirect type and then use one of the free slots
846  * to emplace the new indirect type.
847  *
848  * A new locked, referenced chain element is returned of the specified type.
849  * This element will also be marked as modified and contain a data area
850  * ready for initialization.
851  */
852 hammer2_chain_t *
853 hammer2_chain_create(hammer2_mount_t *hmp, hammer2_chain_t *parent,
854                      hammer2_key_t key, int keybits, int type, size_t bytes)
855 {
856         hammer2_blockref_t dummy;
857         hammer2_blockref_t *base;
858         hammer2_blockref_t *bref;
859         hammer2_chain_t *chain;
860         hammer2_chain_t dummy_chain;
861         int count;
862         int i;
863         int unlock_parent = 0;
864
865         /*
866          * First allocate media space and construct the dummy bref, then
867          * allocate the in-memory chain structure.
868          */
869         bzero(&dummy, sizeof(dummy));
870         dummy.type = type;
871         dummy.key = key;
872         dummy.keybits = keybits;
873         dummy.data_off = (hammer2_off_t)hammer2_freemap_bytes_to_radix(bytes);
874         chain = hammer2_chain_alloc(hmp, &dummy);
875
876         /*
877          * Recalculate bytes to reflect the actual media block allocation,
878          * then allocate the local memory copy.  This is a new structure
879          * so no I/O is performed.
880          */
881         bytes = (hammer2_off_t)1 <<
882                 (int)(chain->bref.data_off & HAMMER2_OFF_MASK_RADIX);
883
884         switch(type) {
885         case HAMMER2_BREF_TYPE_VOLUME:
886                 panic("hammer2_chain_create: called with volume type");
887                 break;
888         case HAMMER2_BREF_TYPE_INODE:
889                 KKASSERT(bytes == HAMMER2_INODE_BYTES);
890                 chain->data = (void *)&chain->u.ip->ip_data;
891                 break;
892         default:
893                 /* leave chain->data NULL */
894                 KKASSERT(chain->data == NULL);
895                 break;
896         }
897         atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL);
898
899 again:
900         /*
901          * Locate a free blockref in the parent's array
902          */
903         switch(parent->bref.type) {
904         case HAMMER2_BREF_TYPE_INODE:
905                 KKASSERT(parent->data != NULL);
906                 base = &parent->data->ipdata.u.blockset.blockref[0];
907                 count = HAMMER2_SET_COUNT;
908                 break;
909         case HAMMER2_BREF_TYPE_INDIRECT:
910                 KKASSERT(parent->data != NULL);
911                 base = &parent->data->npdata.blockref[0];
912                 count = HAMMER2_IND_COUNT;
913                 break;
914         case HAMMER2_BREF_TYPE_VOLUME:
915                 KKASSERT(parent->data != NULL);
916                 base = &hmp->voldata.sroot_blockset.blockref[0];
917                 count = HAMMER2_SET_COUNT;
918                 break;
919         default:
920                 panic("hammer2_chain_push: unrecognized blockref type: %d",
921                       parent->bref.type);
922                 count = 0;
923                 break;
924         }
925
926         /*
927          * Scan for an unallocated bref, also skipping any slots occupied
928          * by in-memory chain elements that may not yet have been updated
929          * in the parent's bref array.
930          */
931         bzero(&dummy_chain, sizeof(dummy_chain));
932         bref = NULL;
933         for (i = 0; i < count; ++i) {
934                 bref = &base[i];
935                 dummy_chain.index = i;
936                 if (bref->type == 0 &&
937                     SPLAY_FIND(hammer2_chain_splay,
938                                &parent->shead, &dummy_chain) == NULL) {
939                         break;
940                 }
941         }
942
943         /*
944          * If no free blockref count be found we must create an indirect
945          * block and move a number of blockrefs into it.  With the parent
946          * locked we can safely lock each child in order to move it without
947          * causing a deadlock.
948          *
949          * This may return the new indirect block or the old parent depending
950          * on where the key falls.
951          */
952         if (i == count) {
953                 hammer2_chain_t *nparent;
954
955                 nparent = hammer2_chain_create_indirect(hmp, parent,
956                                                         key, keybits);
957                 if (nparent == NULL) {
958                         hammer2_chain_free(hmp, chain);
959                         chain = NULL;
960                         goto done;
961                 }
962                 if (parent != nparent) {
963                         if (unlock_parent)
964                                 hammer2_chain_put(hmp, parent);
965                         parent = nparent;
966                         unlock_parent = 1;
967                 }
968                 goto again;
969         }
970
971         /*
972          * Link the chain into its parent.
973          */
974         chain->parent = parent;
975         chain->index = i;
976         if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, chain))
977                 panic("hammer2_chain_link: collision");
978         KKASSERT(parent->refs > 1);
979         atomic_add_int(&parent->refs, 1);
980
981         /*
982          * Additional linkage for inodes.  Reuse the parent pointer to
983          * find the parent directory.
984          */
985         if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
986                 hammer2_chain_t *scan = parent;
987                 while (scan->bref.type == HAMMER2_BREF_TYPE_INDIRECT)
988                         scan = scan->parent;
989                 if (scan->bref.type == HAMMER2_BREF_TYPE_INODE)
990                         chain->u.ip->pip = scan->u.ip;
991         }
992
993         /*
994          * Mark the newly created chain element as modified and fully
995          * resolve the chain->data pointer.
996          *
997          * Chain elements with embedded data will not issue I/O at this time.
998          * A new block will be allocated for the buffer but not instantiated.
999          *
1000          * Chain elements which do not use embedded data will allocate
1001          * the new block AND instantiate its buffer cache buffer, pointing
1002          * the data at the bp.
1003          */
1004         hammer2_chain_modify(hmp, chain);
1005
1006 done:
1007         if (unlock_parent)
1008                 hammer2_chain_put(hmp, parent);
1009         return (chain);
1010 }
1011
1012 /*
1013  * Create an indirect block that covers one or more of the elements in the
1014  * current parent.  Either returns the existing parent with no locking or
1015  * ref changes or returns the new indirect block locked and referenced,
1016  * depending on what the specified key falls into.
1017  *
1018  * The key/keybits for the indirect mode only needs to follow three rules:
1019  *
1020  * (1) That all elements underneath it fit within its key space and
1021  *
1022  * (2) That all elements outside it are outside its key space.
1023  *
1024  * (3) When creating the new indirect block any elements in the current
1025  *     parent that fit within the new indirect block's keyspace must be
1026  *     moved into the new indirect block.
1027  *
1028  * (4) The keyspace chosen for the inserted indirect block CAN cover a wider
1029  *     keyspace the the current parent, but lookup/iteration rules will
1030  *     ensure (and must ensure) that rule (2) for all parents leading up
1031  *     to the nearest inode or the root volume header is adhered to.  This
1032  *     is accomplished by always recursing through matching keyspaces in
1033  *     the hammer2_chain_lookup() and hammer2_chain_next() API.
1034  *
1035  * The current implementation calculates the current worst-case keyspace by
1036  * iterating the current parent and then divides it into two halves, choosing
1037  * whichever half has the most elements (not necessarily the half containing
1038  * the requested key).
1039  *
1040  * We can also opt to use the half with the least number of elements.  This
1041  * causes lower-numbered keys (aka logical file offsets) to recurse through
1042  * fewer indirect blocks and higher-numbered keys to recurse through more.
1043  * This also has the risk of not moving enough elements to the new indirect
1044  * block and being forced to create several indirect blocks before the element
1045  * can be inserted.
1046  */
1047 static
1048 hammer2_chain_t *
1049 hammer2_chain_create_indirect(hammer2_mount_t *hmp, hammer2_chain_t *parent,
1050                               hammer2_key_t create_key, int create_bits)
1051 {
1052         hammer2_blockref_t *base;
1053         hammer2_blockref_t *bref;
1054         hammer2_chain_t *chain;
1055         hammer2_chain_t *ichain;
1056         hammer2_chain_t dummy;
1057         hammer2_key_t key = create_key;
1058         int keybits = create_bits;
1059         int locount = 0;
1060         int hicount = 0;
1061         int count;
1062         int i;
1063
1064         /*
1065          * Mark the parent modified so our base[] pointer remains valid
1066          * while we move entries.
1067          */
1068         hammer2_chain_modify(hmp, parent);
1069
1070         /*
1071          * Locate a free blockref in the parent's array
1072          */
1073         switch(parent->bref.type) {
1074         case HAMMER2_BREF_TYPE_INODE:
1075                 base = &parent->data->ipdata.u.blockset.blockref[0];
1076                 count = HAMMER2_SET_COUNT;
1077                 break;
1078         case HAMMER2_BREF_TYPE_INDIRECT:
1079                 base = &parent->data->npdata.blockref[0];
1080                 count = HAMMER2_IND_COUNT;
1081                 break;
1082         case HAMMER2_BREF_TYPE_VOLUME:
1083                 base = &hmp->voldata.sroot_blockset.blockref[0];
1084                 count = HAMMER2_SET_COUNT;
1085                 break;
1086         default:
1087                 panic("hammer2_chain_push: unrecognized blockref type: %d",
1088                       parent->bref.type);
1089                 count = 0;
1090                 break;
1091         }
1092
1093         /*
1094          * Scan for an unallocated bref, also skipping any slots occupied
1095          * by in-memory chain elements that may not yet have been updated
1096          * in the parent's bref array.
1097          */
1098         bzero(&dummy, sizeof(dummy));
1099         for (i = 0; i < count; ++i) {
1100                 int nkeybits;
1101
1102                 bref = &base[i];
1103                 if (bref->type == 0) {
1104                         dummy.index = i;
1105                         chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead,
1106                                            &dummy);
1107                         if (chain == NULL)
1108                                 continue;
1109                         bref = &chain->bref;
1110                 }
1111
1112                 /*
1113                  * Expand are calculated key range (key, keybits) to fit
1114                  * the scanned key.
1115                  */
1116                 nkeybits = keybits;
1117                 if (nkeybits < bref->keybits)
1118                         nkeybits = bref->keybits;
1119                 while ((~(((hammer2_key_t)1 << nkeybits) - 1) &
1120                         (key ^ bref->key)) != 0) {
1121                         ++nkeybits;
1122                 }
1123
1124                 /*
1125                  * If the new key range is larger we have to determine
1126                  * which side of the new key range the existing keys fall
1127                  * under by checking the high bit, then collapsing the
1128                  * locount into the hicount or vise-versa.
1129                  */
1130                 if (keybits != nkeybits) {
1131                         if (((hammer2_key_t)1 << (nkeybits - 1)) & key) {
1132                                 hicount += locount;
1133                                 locount = 0;
1134                         } else {
1135                                 locount += hicount;
1136                                 hicount = 0;
1137                         }
1138                         keybits = nkeybits;
1139                 }
1140
1141                 /*
1142                  * The newly scanned key will be in the lower half or the
1143                  * higher half of the (new) key range.
1144                  */
1145                 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key)
1146                         ++hicount;
1147                 else
1148                         ++locount;
1149         }
1150
1151         /*
1152          * The key for the indirect block will be the lower half or
1153          * the upper half of the above calculated keyspace.
1154          */
1155         key &= ~(((hammer2_key_t)1 << keybits) - 1);
1156         if (hammer2_indirect_optimize) {
1157                 /*
1158                  * Insert node for least number of keys, best for linear
1159                  * files (?)  XXX won't work if least number is 0.
1160                  */
1161                 panic("hammer2_indirect_optimize not working yet");
1162                 if (hicount < locount)
1163                         key |= (hammer2_key_t)1 << (keybits - 1);
1164         } else {
1165                 /*
1166                  * Insert node for most number of keys, best for heavily
1167                  * fragmented files.
1168                  */
1169                 if (hicount > locount)
1170                         key |= (hammer2_key_t)1 << (keybits - 1);
1171         }
1172
1173         /*
1174          * Ok, create our new indirect block
1175          */
1176         dummy.bref.type = HAMMER2_BREF_TYPE_INDIRECT;
1177         dummy.bref.key = key;
1178         dummy.bref.keybits = keybits - 1;
1179         dummy.bref.data_off = (hammer2_off_t)
1180                             hammer2_freemap_bytes_to_radix(HAMMER2_PBUFSIZE);
1181         dummy.index = -1;       /* not yet assigned */
1182         ichain = hammer2_chain_alloc(hmp, &dummy.bref);
1183         kprintf("create_indirect2: allocate %016jx/%d\n", key, keybits);
1184
1185         /*
1186          * Iterate the original parent and move the matching brefs into
1187          * the new indirect block.  All the keys are inclusive of keybits
1188          * so we only have to check bit (keybits - 1).
1189          */
1190         for (i = 0; i < count; ++i) {
1191                 bref = &base[i];
1192                 if (bref->type == 0) {
1193                         dummy.index = i;
1194                         chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead,
1195                                            &dummy);
1196                         if (chain == NULL) {
1197                                 /*
1198                                  * Select index indirect block is placed in
1199                                  */
1200                                 if (ichain->index < 0)
1201                                         ichain->index = i;
1202                                 continue;
1203                         }
1204                         bref = &chain->bref;
1205                 }
1206
1207                 /*
1208                  * Skip keys not in the chosen half (low or high), only bit
1209                  * (keybits - 1) needs to be compared but for safety we
1210                  * will compare all msb bits plus that bit again.
1211                  */
1212                 if ((~(((hammer2_key_t)1 << (keybits - 1)) - 1) &
1213                     (key ^ bref->key)) != 0) {
1214                         continue;
1215                 }
1216
1217                 /*
1218                  * This element is being moved, its slot is available
1219                  * for our indirect block.
1220                  */
1221                 if (ichain->index < 0)
1222                         ichain->index = i;
1223                 bzero(&base[i], sizeof(base[i]));
1224
1225                 /*
1226                  * Load the new indirect block by acquiring or allocating
1227                  * the related chain entries, then simply move it to the
1228                  * new parent (ichain).
1229                  *
1230                  * Flagging the new chain entry MOVED will cause a flush
1231                  * to synchronize its block into the new indirect block.
1232                  * The chain is unlocked after being moved but needs to
1233                  * retain a reference for the MOVED state
1234                  *
1235                  * We must still set SUBMODIFIED in the parent but we do
1236                  * that after the loop.
1237                  *
1238                  * XXX we really need a lock here but we don't need the
1239                  *     data.  NODATA feature needed.
1240                  */
1241                 chain = hammer2_chain_get(hmp, parent, i,
1242                                           HAMMER2_LOOKUP_NOLOCK);
1243                 SPLAY_REMOVE(hammer2_chain_splay, &parent->shead, chain);
1244                 if (SPLAY_INSERT(hammer2_chain_splay, &ichain->shead, chain))
1245                         panic("hammer2_chain_create_indirect: collision");
1246                 chain->parent = ichain;
1247                 atomic_add_int(&parent->refs, -1);
1248                 atomic_add_int(&ichain->refs, 1);
1249                 if (chain->flags & HAMMER2_CHAIN_MOVED) {
1250                         hammer2_chain_drop(hmp, chain);
1251                 } else {
1252                         atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED);
1253                 }
1254                 KKASSERT(parent->refs > 0);
1255                 chain = NULL;
1256         }
1257
1258         /*
1259          * Insert the new indirect block into the parent now that we've
1260          * cleared out some entries in the parent.  We calculated a good
1261          * insertion index in the loop above (ichain->index).
1262          */
1263         KKASSERT(ichain->index >= 0);
1264         kprintf("insert ichain at %d\n", ichain->index);
1265         if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, ichain))
1266                 panic("hammer2_chain_create_indirect: ichain insertion");
1267         ichain->parent = parent;
1268         atomic_add_int(&parent->refs, 1);
1269
1270         /*
1271          * Mark the new indirect block modified after insertion, which
1272          * will propagate up through parent all the way to the root and
1273          * also allocate the physical block in ichain for our caller.
1274          *
1275          * We have to set SUBMODIFIED in ichain's flags manually so the
1276          * flusher knows it has to recurse through it to get to all of
1277          * our moved blocks.
1278          */
1279         hammer2_chain_modify(hmp, ichain);
1280         atomic_set_int(&ichain->flags, HAMMER2_CHAIN_SUBMODIFIED);
1281
1282         /*
1283          * Figure out what to return.
1284          */
1285         if (create_bits >= keybits) {
1286                 /*
1287                  * Key being created is way outside the key range,
1288                  * return the original parent.
1289                  */
1290                 hammer2_chain_put(hmp, ichain);
1291         } else if (~(((hammer2_key_t)1 << (keybits - 1)) - 1) &
1292                    (create_key ^ key)) {
1293                 /*
1294                  * Key being created is outside the key range,
1295                  * return the original parent.
1296                  */
1297                 hammer2_chain_put(hmp, ichain);
1298         } else {
1299                 /*
1300                  * Otherwise its in the range, return the new parent.
1301                  */
1302                 parent = ichain;
1303         }
1304
1305         kprintf("create_indirect9\n");
1306
1307         return(parent);
1308 }
1309
1310 /*
1311  * Physically delete the specified chain element.  Note that inodes with
1312  * open descriptors should not be deleted (as with other filesystems) until
1313  * the last open descriptor is closed.
1314  *
1315  * This routine will remove the chain element from its parent and potentially
1316  * also recurse upward and delete indirect blocks which become empty as a
1317  * side effect.
1318  *
1319  * The caller must pass a pointer to the chain's parent, also locked and
1320  * referenced.  (*parentp) will be modified in a manner similar to a lookup
1321  * or iteration when indirect blocks are also deleted as a side effect.
1322  */
1323 void
1324 hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
1325                      hammer2_chain_t *chain)
1326 {
1327 }
1328
1329 /*
1330  * Recursively flush the specified chain.  The chain is locked and
1331  * referenced by the caller and will remain so on return.
1332  *
1333  * This cannot be called with the volume header's vchain
1334  */
1335 void
1336 hammer2_chain_flush(hammer2_mount_t *hmp, hammer2_chain_t *chain,
1337                     hammer2_blockref_t *parent_bref)
1338 {
1339         /*
1340          * Flush any children of this chain entry.
1341          */
1342         if (chain->flags & HAMMER2_CHAIN_SUBMODIFIED) {
1343                 hammer2_blockref_t *base;
1344                 hammer2_blockref_t bref;
1345                 hammer2_chain_t *scan;
1346                 hammer2_chain_t *next;
1347                 int count;
1348                 int submodified = 0;
1349
1350                 /*
1351                  * Modifications to the children will propagate up, forcing
1352                  * us to become modified and copy-on-write too.
1353                  */
1354                 hammer2_chain_modify(hmp, chain);
1355
1356                 /*
1357                  * The blockref in the parent's array must be repointed at
1358                  * the new block allocated by the child after its flush.
1359                  *
1360                  * Calculate the base of the array.
1361                  */
1362                 switch(chain->bref.type) {
1363                 case HAMMER2_BREF_TYPE_INODE:
1364                         KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
1365                         base = &chain->data->ipdata.u.blockset.blockref[0];
1366                         count = HAMMER2_SET_COUNT;
1367                         break;
1368                 case HAMMER2_BREF_TYPE_INDIRECT:
1369                         base = &chain->data->npdata.blockref[0];
1370                         count = HAMMER2_IND_COUNT;
1371                         break;
1372                 case HAMMER2_BREF_TYPE_VOLUME:
1373                         KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
1374                         base = &hmp->voldata.sroot_blockset.blockref[0];
1375                         count = HAMMER2_SET_COUNT;
1376                         break;
1377                 default:
1378                         base = NULL;
1379                         panic("hammer2_chain_get: unrecognized blockref type: %d",
1380                               chain->bref.type);
1381                 }
1382
1383                 /*
1384                  * Flush the children and update the blockrefs in the parent.
1385                  * Be careful of ripouts during the loop.
1386                  */
1387                 next = SPLAY_MIN(hammer2_chain_splay, &chain->shead);
1388                 while ((scan = next) != NULL) {
1389                         next = SPLAY_NEXT(hammer2_chain_splay, &chain->shead,
1390                                           scan);
1391                         if (scan->flags & (HAMMER2_CHAIN_SUBMODIFIED |
1392                                            HAMMER2_CHAIN_MODIFIED |
1393                                            HAMMER2_CHAIN_MOVED)) {
1394                                 hammer2_chain_ref(hmp, scan);
1395                                 hammer2_chain_lock(hmp, scan);
1396                                 bref = base[scan->index];
1397                                 hammer2_chain_flush(hmp, scan, &bref);
1398                                 if (scan->flags & (HAMMER2_CHAIN_SUBMODIFIED |
1399                                                    HAMMER2_CHAIN_MODIFIED)) {
1400                                         submodified = 1;
1401                                         kprintf("flush race, sub dirty\n");
1402                                 } else {
1403                                         KKASSERT(scan->index < count);
1404                                         base[scan->index] = bref;
1405                                         if (scan->flags & HAMMER2_CHAIN_MOVED) {
1406                                                 atomic_clear_int(&scan->flags,
1407                                                          HAMMER2_CHAIN_MOVED);
1408                                                 hammer2_chain_drop(hmp, scan);
1409                                         }
1410                                 }
1411                                 hammer2_chain_put(hmp, scan);
1412                         }
1413                 }
1414                 if (submodified == 0) {
1415                         atomic_clear_int(&chain->flags,
1416                                          HAMMER2_CHAIN_SUBMODIFIED);
1417                 }
1418         }
1419
1420         /*
1421          * Flush this chain entry only if it is marked modified.
1422          *
1423          * If the chain entry was moved we must still updated *parent_bref
1424          * or the indirect block won't be adjusted to point to us.
1425          */
1426         if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) {
1427                 if (chain->flags & HAMMER2_CHAIN_MOVED)
1428                         *parent_bref = chain->bref;
1429                 return;
1430         }
1431
1432         /*
1433          * If this is part of a recursive flush we can go ahead and write
1434          * out the buffer cache buffer and pass a new bref back up the chain.
1435          *
1436          * This will never be a volume header.
1437          */
1438         if (parent_bref) {
1439                 hammer2_blockref_t *bref;
1440                 hammer2_off_t off_hi;
1441                 struct buf *bp;
1442                 size_t off_lo;
1443                 size_t bytes;
1444                 int error;
1445
1446                 KKASSERT(chain->data != NULL);
1447                 bref = &chain->bref;
1448
1449                 off_hi = bref->data_off & HAMMER2_OFF_MASK_HI;
1450                 off_lo = (size_t)bref->data_off & HAMMER2_OFF_MASK_LO;
1451                 bytes = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
1452                 KKASSERT(off_hi != 0);  /* not the root volume header */
1453
1454                 if (chain->bp) {
1455                         /*
1456                          * The data is mapped directly to the bp and will be
1457                          * written out when the chain is unlocked by the
1458                          * parent.  However, since we are clearing the
1459                          * MODIFIED flag we have to set the FLUSHED flag
1460                          * so the hammer2_chain_unlock() code knows to
1461                          * bdwrite() the buffer.
1462                          */
1463                         atomic_set_int(&chain->flags, HAMMER2_CHAIN_FLUSHED);
1464                 } else {
1465                         /*
1466                          * The data is embedded, we have to acquire the
1467                          * buffer cache buffer and copy the data into it.
1468                          */
1469                         bp = NULL;
1470                         error = bread(hmp->devvp, off_hi,
1471                                       HAMMER2_PBUFSIZE, &bp);
1472                         KKASSERT(error == 0); /* XXX */
1473
1474                         /*
1475                          * Copy the data to the buffer, mark the buffer
1476                          * dirty, and convert the chain to unmodified.
1477                          */
1478                         bcopy(chain->data, (char *)bp->b_data + off_lo, bytes);
1479                         bdwrite(bp);
1480                         bp = NULL;
1481
1482                         chain->bref.check.iscsi32.value =
1483                                         hammer2_icrc32(chain->data, bytes);
1484                 }
1485
1486                 /*
1487                  * Return information on the new block to the parent.
1488                  */
1489                 *parent_bref = chain->bref;
1490                 hammer2_chain_drop(hmp, chain); /* drop ref tracking mod bit */
1491                 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED);
1492         } else {
1493                 hammer2_blockref_t *bref;
1494
1495                 KKASSERT(chain->data != NULL);
1496                 KKASSERT(chain->bp == NULL);
1497                 bref = &chain->bref;
1498
1499                 switch(bref->type) {
1500                 case HAMMER2_BREF_TYPE_VOLUME:
1501                         hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]=
1502                                 hammer2_icrc32(
1503                                         (char *)&hmp->voldata +
1504                                          HAMMER2_VOLUME_ICRC1_OFF,
1505                                         HAMMER2_VOLUME_ICRC1_SIZE);
1506                         hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]=
1507                                 hammer2_icrc32(
1508                                         (char *)&hmp->voldata +
1509                                          HAMMER2_VOLUME_ICRC0_OFF,
1510                                         HAMMER2_VOLUME_ICRC0_SIZE);
1511                         hmp->voldata.icrc_volheader =
1512                                 hammer2_icrc32(
1513                                         (char *)&hmp->voldata +
1514                                          HAMMER2_VOLUME_ICRCVH_OFF,
1515                                         HAMMER2_VOLUME_ICRCVH_SIZE);
1516                         break;
1517                 }
1518         }
1519 }