Merge branches 'hammer2' and 'master' of ssh://crater.dragonflybsd.org/repository...
[dragonfly.git] / sys / vfs / hammer2 / hammer2_chain.c
1 /*
2  * Copyright (c) 2011-2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 /*
36  * This subsystem handles direct and indirect block searches, recursions,
37  * creation, and deletion.  Chains of blockrefs are tracked and modifications
38  * are flag for propagation... eventually all the way back to the volume
39  * header.
40  */
41
42 #include <sys/cdefs.h>
43 #include <sys/cdefs.h>
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/types.h>
47 #include <sys/lock.h>
48 #include <sys/uuid.h>
49
50 #include "hammer2.h"
51
52 SPLAY_GENERATE(hammer2_chain_splay, hammer2_chain, snode, hammer2_chain_cmp);
53
54 static int hammer2_indirect_optimize;   /* XXX SYSCTL */
55
56 static hammer2_chain_t *hammer2_chain_create_indirect(
57                         hammer2_mount_t *hmp, hammer2_chain_t *parent,
58                         hammer2_key_t key, int keybits);
59
60 /*
61  * Compare function for chain splay tree
62  */
63 int
64 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2)
65 {
66         return(chain2->index - chain1->index);
67 }
68
69 /*
70  * Allocate a new disconnected chain element representing the specified
71  * bref.  The chain element is locked exclusively and refs is set to 1.
72  *
73  * This essentially allocates a system memory structure representing one
74  * of the media structure types, including inodes.
75  */
76 hammer2_chain_t *
77 hammer2_chain_alloc(hammer2_mount_t *hmp, hammer2_blockref_t *bref)
78 {
79         hammer2_chain_t *chain;
80         hammer2_inode_t *ip;
81         hammer2_indblock_t *np;
82         hammer2_data_t *dp;
83
84         /*
85          * Construct the appropriate system structure.
86          */
87         switch(bref->type) {
88         case HAMMER2_BREF_TYPE_INODE:
89                 ip = kmalloc(sizeof(*ip), hmp->minode, M_WAITOK | M_ZERO);
90                 chain = &ip->chain;
91                 chain->u.ip = ip;
92                 lockinit(&chain->lk, "inode", 0, LK_CANRECURSE);
93                 ip->hmp = hmp;
94                 break;
95         case HAMMER2_BREF_TYPE_INDIRECT:
96                 np = kmalloc(sizeof(*np), hmp->mchain, M_WAITOK | M_ZERO);
97                 chain = &np->chain;
98                 chain->u.np = np;
99                 lockinit(&chain->lk, "iblk", 0, LK_CANRECURSE);
100                 break;
101         case HAMMER2_BREF_TYPE_DATA:
102                 dp = kmalloc(sizeof(*dp), hmp->mchain, M_WAITOK | M_ZERO);
103                 chain = &dp->chain;
104                 chain->u.dp = dp;
105                 lockinit(&chain->lk, "dblk", 0, LK_CANRECURSE);
106                 break;
107         case HAMMER2_BREF_TYPE_VOLUME:
108                 chain = NULL;
109                 panic("hammer2_chain_get: volume type illegal for op");
110         default:
111                 chain = NULL;
112                 panic("hammer2_chain_get: unrecognized blockref type: %d",
113                       bref->type);
114         }
115         chain->bref = *bref;
116         chain->index = -1;      /* not yet assigned */
117
118         chain->refs = 1;
119         lockmgr(&chain->lk, LK_EXCLUSIVE);
120
121         return (chain);
122 }
123
124 /*
125  * Free a disconnected chain element
126  */
127 void
128 hammer2_chain_free(hammer2_mount_t *hmp, hammer2_chain_t *chain)
129 {
130         void *mem;
131
132         KKASSERT(chain->bp == NULL);
133         KKASSERT(chain->data == NULL);
134         KKASSERT(chain->bref.type != HAMMER2_BREF_TYPE_INODE ||
135                  chain->u.ip->vp == NULL);
136
137         if ((mem = chain->u.mem) != NULL) {
138                 chain->u.mem = NULL;
139                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
140                         kfree(mem, hmp->minode);
141                 else
142                         kfree(mem, hmp->mchain);
143         }
144 }
145
146 /*
147  * Add a reference to a chain element (for shared access).  The chain
148  * element must already have at least 1 ref controlled by the caller.
149  */
150 void
151 hammer2_chain_ref(hammer2_mount_t *hmp, hammer2_chain_t *chain)
152 {
153         KKASSERT(chain->refs > 0);
154         atomic_add_int(&chain->refs, 1);
155 }
156
157 /*
158  * Drop the callers reference to the chain element.  If the ref count
159  * reaches zero the chain element and its related structure (typically an
160  * inode or indirect block) will be freed and the parent will be
161  * recursively dropped.
162  *
163  * Modified elements hold an additional reference so it should not be
164  * possible for the count on a modified element to drop to 0.
165  *
166  * The chain element must NOT be locked by the caller.
167  *
168  * The parent might or might not be locked by the caller but if so it
169  * will also be referenced so we shouldn't recurse upward.
170  */
171 void
172 hammer2_chain_drop(hammer2_mount_t *hmp, hammer2_chain_t *chain)
173 {
174         hammer2_chain_t *parent;
175         u_int refs;
176
177         while (chain) {
178                 refs = chain->refs;
179                 cpu_ccfence();
180                 if (refs == 1) {
181                         KKASSERT(chain != &hmp->vchain);
182                         parent = chain->parent;
183                         lockmgr(&parent->lk, LK_EXCLUSIVE);
184                         if (atomic_cmpset_int(&chain->refs, 1, 0)) {
185                                 /*
186                                  * Succeeded, recurse and drop parent
187                                  */
188                                 SPLAY_REMOVE(hammer2_chain_splay,
189                                              &parent->shead, chain);
190                                 chain->parent = NULL;
191                                 lockmgr(&parent->lk, LK_RELEASE);
192                                 hammer2_chain_free(hmp, chain);
193                                 chain = parent;
194                         } else {
195                                 lockmgr(&parent->lk, LK_RELEASE);
196                         }
197                 } else {
198                         if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) {
199                                 /*
200                                  * Succeeded, count did not reach zero so
201                                  * cut out of the loop.
202                                  */
203                                 break;
204                         }
205                 }
206         }
207 }
208
209 /*
210  * Lock a chain element, acquiring its data with I/O if necessary.
211  *
212  * Returns 0 on success or an error code if the data could not be acquired.
213  * The chain element is locked either way.
214  *
215  * chain->data will be pointed either at the embedded data (e.g. for
216  * inodes), in which case the buffer cache buffer is released, or will
217  * point into the bp->b_data buffer with the bp left intact while locked.
218  */
219 int
220 hammer2_chain_lock(hammer2_mount_t *hmp, hammer2_chain_t *chain)
221 {
222         hammer2_blockref_t *bref;
223         hammer2_off_t off_hi;
224         size_t off_lo;
225         int error;
226         void *data;
227
228         /*
229          * Lock the element.  Under certain conditions this might end up
230          * being a recursive lock.
231          */
232         KKASSERT(chain->refs > 0);
233         lockmgr(&chain->lk, LK_EXCLUSIVE);
234
235         /*
236          * The volume header is a special case
237          */
238         if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME)
239                 return(0);
240
241         /*
242          * bp must be NULL, so if the data pointer is valid here it points
243          * to embedded data and no I/O is necessary (whether modified or not).
244          */
245         KKASSERT(chain->bp == NULL);
246         if (chain->data)
247                 return (0);
248
249         /*
250          * If data is NULL we must issue I/O.  Any error returns the error
251          * code but leaves the chain locked.
252          *
253          * If the chain was modified a new bref will have already been
254          * allocated and its related bp is probably still sitting in the
255          * buffer cache.
256          */
257         bref = &chain->bref;
258
259         off_hi = bref->data_off & HAMMER2_OFF_MASK_HI;
260         off_lo = (size_t)bref->data_off & HAMMER2_OFF_MASK_LO;
261         KKASSERT(off_hi != 0);
262         error = bread(hmp->devvp, off_hi, HAMMER2_PBUFSIZE, &chain->bp);
263
264         if (error) {
265                 kprintf("hammer2_chain_get: I/O error %016jx: %d\n",
266                         (intmax_t)off_hi, error);
267                 brelse(chain->bp);
268                 chain->bp = NULL;
269                 return (error);
270         }
271
272         /*
273          * Setup the data pointer, either pointing it to an embedded data
274          * structure and copying the data from the buffer, or pointint it
275          * into the buffer.
276          *
277          * The buffer is not retained when copying to an embedded data
278          * structure in order to avoid potential deadlocks or recursions
279          * on the same physical buffer.
280          */
281         switch (bref->type) {
282         case HAMMER2_BREF_TYPE_VOLUME:
283                 /*
284                  * Copy data from bp to embedded buffer
285                  */
286                 KKASSERT(0);    /* not yet - have mount use this soon */
287                 KKASSERT(off_hi == 0);
288                 bcopy((char *)chain->bp->b_data + off_lo,
289                       &hmp->voldata, HAMMER2_PBUFSIZE);
290                 chain->data = (void *)&hmp->voldata;
291                 brelse(chain->bp);
292                 chain->bp = NULL;
293                 break;
294         case HAMMER2_BREF_TYPE_INODE:
295                 /*
296                  * Copy data from bp to embedded buffer.
297                  */
298                 bcopy((char *)chain->bp->b_data + off_lo,
299                       &chain->u.ip->ip_data,
300                       HAMMER2_INODE_BYTES);
301                 chain->data = (void *)&chain->u.ip->ip_data;
302                 brelse(chain->bp);
303                 chain->bp = NULL;
304                 break;
305         default:
306                 /*
307                  * Leave bp intact
308                  */
309                 data = (char *)chain->bp->b_data + off_lo;
310                 chain->data = data;
311                 break;
312         }
313         return (0);
314 }
315
316 /*
317  * Convert a locked chain that was retrieved read-only to read-write.
318  *
319  * If not already marked modified a new physical block will be allocated
320  * and assigned to the bref.  If the data is pointing into an existing
321  * bp it will be copied to the new bp and the new bp will replace the
322  * existing bp.
323  *
324  * If the data is embedded we allocate the new physical block but don't
325  * bother copying the data into it (yet).
326  */
327 void
328 hammer2_chain_modify(hammer2_mount_t *hmp, hammer2_chain_t *chain)
329 {
330         hammer2_chain_t *parent;
331         struct buf *nbp;
332         size_t bytes;
333         void *ndata;
334         int error;
335
336         /*
337          * If the chain is already marked modified we can just return.
338          */
339         if (chain->flags & HAMMER2_CHAIN_MODIFIED) {
340                 KKASSERT(chain->data != NULL);
341                 return;
342         }
343
344         /*
345          * The MODIFIED bit is not yet set, we must allocate the
346          * copy-on-write block.
347          *
348          * If the data is embedded no other action is required.
349          *
350          * If the data is not embedded we acquire and clear the
351          * new block.  If chain->data is not NULL we then do the
352          * copy-on-write.  chain->data will then be repointed to the new
353          * buffer and the old buffer will be released.
354          *
355          * For newly created elements with no prior allocation we go
356          * through the copy-on-write steps except without the copying part.
357          */
358         atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED);
359         hammer2_chain_ref(hmp, chain);  /* ref for modified bit */
360
361         bytes = 1 << (int)(chain->bref.data_off & HAMMER2_OFF_MASK_RADIX);
362         if (chain != &hmp->vchain) {
363                 chain->bref.data_off = hammer2_freemap_alloc(hmp, bytes);
364                 /* XXX failed allocation */
365         }
366
367         switch(chain->bref.type) {
368         case HAMMER2_BREF_TYPE_VOLUME:          /* embedded */
369         case HAMMER2_BREF_TYPE_INODE:           /* embedded */
370                 /*
371                  * data points to embedded structure, no copy needed
372                  */
373                 error = 0;
374                 break;
375         case HAMMER2_BREF_TYPE_INDIRECT:
376         case HAMMER2_BREF_TYPE_DATA:
377                 /*
378                  * data (if not NULL) points into original bp, copy-on-write
379                  * to new block.
380                  */
381                 KKASSERT(chain != &hmp->vchain);        /* safety */
382                 if (bytes == HAMMER2_PBUFSIZE) {
383                         nbp = getblk(hmp->devvp,
384                                      chain->bref.data_off & HAMMER2_OFF_MASK_HI,
385                                      HAMMER2_PBUFSIZE, 0, 0);
386                         vfs_bio_clrbuf(nbp);
387                         error = 0;
388                 } else {
389                         error = bread(hmp->devvp,
390                                      chain->bref.data_off & HAMMER2_OFF_MASK_HI,
391                                      HAMMER2_PBUFSIZE, &nbp);
392                         KKASSERT(error == 0);/* XXX handle error */
393                 }
394                 ndata = nbp->b_data + (chain->bref.data_off &
395                                        HAMMER2_OFF_MASK_LO);
396                 if (chain->data) {
397                         bcopy(chain->data, ndata, bytes);
398                         KKASSERT(chain->bp != NULL);
399                         brelse(chain->bp);
400                 }
401                 chain->bp = nbp;
402                 chain->data = ndata;
403                 break;
404         default:
405                 panic("hammer2_chain_modify: unknown bref type");
406                 break;
407
408         }
409
410         /*
411          * Recursively mark the parent chain elements so flushes can find
412          * modified elements.
413          */
414         parent = chain->parent;
415         while (parent && (parent->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) {
416                 atomic_set_int(&parent->flags, HAMMER2_CHAIN_SUBMODIFIED);
417                 parent = parent->parent;
418         }
419 }
420
421 /*
422  * Unlock a chain element without dropping its reference count.
423  * (see hammer2_chain_put() to do both).
424  *
425  * Non-embedded data references (chain->bp != NULL) are returned to the
426  * system and the data field is cleared in that case.  If modified the
427  * dirty buffer is still returned to the system, can be flushed to disk by
428  * the system at any time, and will be reconstituted/re-read as needed.
429  */
430 void
431 hammer2_chain_unlock(hammer2_mount_t *hmp, hammer2_chain_t *chain)
432 {
433         if (chain->bp) {
434                 chain->data = NULL;
435                 if (chain->flags & (HAMMER2_CHAIN_MODIFIED |
436                                     HAMMER2_CHAIN_FLUSHED))
437                         bdwrite(chain->bp);
438                 else
439                         bqrelse(chain->bp);
440                 chain->bp = NULL;
441         }
442         lockmgr(&chain->lk, LK_RELEASE);
443 }
444
445 /*
446  * Locate an in-memory chain.  The parent must be locked.  The in-memory
447  * chain is returned or NULL if no in-memory chain is present.
448  *
449  * NOTE: A chain on-media might exist for this index when NULL is returned.
450  */
451 hammer2_chain_t *
452 hammer2_chain_find(hammer2_mount_t *hmp, hammer2_chain_t *parent, int index)
453 {
454         hammer2_chain_t dummy;
455         hammer2_chain_t *chain;
456
457         dummy.index = index;
458         chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead, &dummy);
459         return (chain);
460 }
461
462 /*
463  * Return a locked chain structure with all associated data acquired.
464  *
465  * Caller must lock the parent on call, the returned child will be locked.
466  */
467 hammer2_chain_t *
468 hammer2_chain_get(hammer2_mount_t *hmp, hammer2_chain_t *parent,
469                   int index, int flags)
470 {
471         hammer2_blockref_t *bref;
472         hammer2_chain_t *chain;
473         hammer2_chain_t dummy;
474
475         /*
476          * First see if we have a (possibly modified) chain element cached
477          * for this (parent, index).  Acquire the data if necessary.
478          *
479          * If chain->data is non-NULL the chain should already be marked
480          * modified.
481          */
482         dummy.index = index;
483         chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead, &dummy);
484         if (chain) {
485                 hammer2_chain_ref(hmp, chain);
486                 if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0)
487                         hammer2_chain_lock(hmp, chain);
488                 return(chain);
489         }
490
491         /*
492          * Otherwise lookup the bref and issue I/O (switch on the parent)
493          */
494         switch(parent->bref.type) {
495         case HAMMER2_BREF_TYPE_INODE:
496                 KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
497                 bref = &parent->data->ipdata.u.blockset.blockref[index];
498                 break;
499         case HAMMER2_BREF_TYPE_INDIRECT:
500                 KKASSERT(index >= 0 && index < HAMMER2_IND_COUNT);
501                 bref = &parent->data->npdata.blockref[index];
502                 break;
503         case HAMMER2_BREF_TYPE_VOLUME:
504                 KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
505                 bref = &hmp->voldata.sroot_blockset.blockref[index];
506                 break;
507         default:
508                 bref = NULL;
509                 panic("hammer2_chain_get: unrecognized blockref type: %d",
510                       parent->bref.type);
511         }
512         chain = hammer2_chain_alloc(hmp, bref);
513
514         /*
515          * Link the chain into its parent.  Caller is expected to hold an
516          * exclusive lock on the parent.
517          */
518         chain->parent = parent;
519         chain->index = index;
520         if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, chain))
521                 panic("hammer2_chain_link: collision");
522         KKASSERT(parent->refs > 1);
523         atomic_add_int(&parent->refs, 1);
524
525         /*
526          * Additional linkage for inodes.  Reuse the parent pointer to
527          * find the parent directory.
528          */
529         if (bref->type == HAMMER2_BREF_TYPE_INODE) {
530                 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT)
531                         parent = parent->parent;
532                 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE)
533                         chain->u.ip->pip = parent->u.ip;
534         }
535
536         /*
537          * Our new chain structure has already been referenced and locked
538          * but the lock code handles the I/O so call it to resolve the data.
539          * Then release one of our two exclusive locks.
540          *
541          * If NOLOCK is set the release will release the one-and-only lock.
542          */
543         if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0)
544                 hammer2_chain_lock(hmp, chain);
545         lockmgr(&chain->lk, LK_RELEASE);
546         /* still locked */
547
548         return (chain);
549 }
550
551 /*
552  * Unlock and dereference a chain after use.  It is possible for this to
553  * recurse up the chain.
554  */
555 void
556 hammer2_chain_put(hammer2_mount_t *hmp, hammer2_chain_t *chain)
557 {
558         hammer2_chain_unlock(hmp, chain);
559         hammer2_chain_drop(hmp, chain);
560 }
561
562 /*
563  * Locate any key between key_beg and key_end inclusive.  (*parentp)
564  * typically points to an inode but can also point to a related indirect
565  * block and this function will recurse upwards and find the inode again.
566  *
567  * (*parentp) must be exclusively locked and referenced and can be an inode
568  * or an existing indirect block within the inode.
569  *
570  * On return (*parentp) will be modified to point at the deepest parent chain
571  * element encountered during the search, as a helper for an insertion or
572  * deletion.   The new (*parentp) will be locked and referenced and the old
573  * will be unlocked and dereferenced (no change if they are both the same).
574  *
575  * The matching chain will be returned exclusively locked and referenced.
576  *
577  * NULL is returned if no match was found, but (*parentp) will still
578  * potentially be adjusted.
579  *
580  * This function will also recurse up the chain if the key is not within the
581  * current parent's range.  (*parentp) can never be set to NULL.  An iteration
582  * can simply allow (*parentp) to float inside the loop.
583  */
584 hammer2_chain_t *
585 hammer2_chain_lookup(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
586                      hammer2_key_t key_beg, hammer2_key_t key_end,
587                      int flags)
588 {
589         hammer2_chain_t *parent;
590         hammer2_chain_t *chain;
591         hammer2_chain_t *tmp;
592         hammer2_blockref_t *base;
593         hammer2_blockref_t *bref;
594         hammer2_key_t scan_beg;
595         hammer2_key_t scan_end;
596         int count = 0;
597         int i;
598
599         /*
600          * Recurse (*parentp) upward if necessary until the parent completely
601          * encloses the key range or we hit the inode.
602          */
603         parent = *parentp;
604         while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
605                 scan_beg = parent->bref.key;
606                 scan_end = scan_beg +
607                            ((hammer2_key_t)1 << parent->bref.keybits) - 1;
608                 if (key_beg >= scan_beg && key_end <= scan_end)
609                         break;
610                 hammer2_chain_unlock(hmp, parent);
611                 parent = parent->parent;
612                 hammer2_chain_ref(hmp, parent);         /* ref new parent */
613                 hammer2_chain_lock(hmp, parent);        /* lock new parent */
614                 hammer2_chain_drop(hmp, *parentp);      /* drop old parent */
615                 *parentp = parent;                      /* new parent */
616         }
617
618 again:
619         /*
620          * Locate the blockref array.  Currently we do a fully associative
621          * search through the array.
622          */
623         switch(parent->bref.type) {
624         case HAMMER2_BREF_TYPE_INODE:
625                 base = &parent->data->ipdata.u.blockset.blockref[0];
626                 count = HAMMER2_SET_COUNT;
627                 break;
628         case HAMMER2_BREF_TYPE_INDIRECT:
629                 base = &parent->data->npdata.blockref[0];
630                 count = HAMMER2_IND_COUNT;
631                 break;
632         case HAMMER2_BREF_TYPE_VOLUME:
633                 base = &hmp->voldata.sroot_blockset.blockref[0];
634                 count = HAMMER2_SET_COUNT;
635                 break;
636         default:
637                 panic("hammer2_chain_push: unrecognized blockref type: %d",
638                       parent->bref.type);
639                 base = NULL;    /* safety */
640                 count = 0;      /* safety */
641         }
642
643         /*
644          * If the element and key overlap we use the element.
645          */
646         bref = NULL;
647         for (i = 0; i < count; ++i) {
648                 tmp = hammer2_chain_find(hmp, parent, i);
649                 bref = (tmp) ? &tmp->bref : &base[i];
650                 if (bref->type == 0)
651                         continue;
652                 kprintf("lookup(%016jx,%d) %d: %016jx/%d\n",
653                         parent->bref.data_off, i,
654                         bref->type,bref->key, bref->keybits);
655                 scan_beg = bref->key;
656                 scan_end = scan_beg + ((hammer2_key_t)1 << bref->keybits) - 1;
657                 if (key_beg <= scan_end && key_end >= scan_beg)
658                         break;
659         }
660         kprintf("lookup scan %d\n", i);
661         if (i == count) {
662                 if (key_beg == key_end)
663                         return (NULL);
664                 return (hammer2_chain_next(hmp, parentp, NULL,
665                                            key_beg, key_end, flags));
666         }
667
668         /*
669          * Acquire the new chain element.  If the chain element is an
670          * indirect block we must search recursively.
671          */
672         chain = hammer2_chain_get(hmp, parent, i, flags);
673         if (chain == NULL)
674                 return (NULL);
675
676         /*
677          * If the chain element is an indirect block it becomes the new
678          * parent and we loop on it.
679          */
680         if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
681                 hammer2_chain_put(hmp, parent);
682                 *parentp = parent = chain;
683                 goto again;
684         }
685
686         /*
687          * All done, return chain
688          */
689         return (chain);
690 }
691
692 /*
693  * After having issued a lookup we can iterate all matching keys.
694  *
695  * If chain is non-NULL we continue the iteration from just after it's index.
696  *
697  * If chain is NULL we assume the parent was exhausted and continue the
698  * iteration at the next parent.
699  */
700 hammer2_chain_t *
701 hammer2_chain_next(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
702                    hammer2_chain_t *chain,
703                    hammer2_key_t key_beg, hammer2_key_t key_end,
704                    int flags)
705 {
706         hammer2_chain_t *parent;
707         hammer2_chain_t *tmp;
708         hammer2_blockref_t *base;
709         hammer2_blockref_t *bref;
710         hammer2_key_t scan_beg;
711         hammer2_key_t scan_end;
712         int i;
713         int count;
714
715         parent = *parentp;
716
717 again:
718         /*
719          * Calculate the next index and recalculate the parent if necessary.
720          */
721         if (chain) {
722                 /*
723                  * Continue iteration within current parent
724                  */
725                 i = chain->index + 1;
726                 hammer2_chain_put(hmp, chain);
727                 chain = NULL;
728         } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT) {
729                 /*
730                  * We reached the end of the iteration.
731                  */
732                 return (NULL);
733         } else {
734                 /*
735                  * Continue iteration with next parent
736                  */
737                 hammer2_chain_t *nparent;
738
739                 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT)
740                         return (NULL);
741                 i = parent->index + 1;
742                 nparent = parent->parent;
743                 hammer2_chain_ref(hmp, nparent);        /* ref new parent */
744                 hammer2_chain_unlock(hmp, parent);
745                 parent = nparent;
746                 hammer2_chain_lock(hmp, parent);        /* lock new parent */
747                 hammer2_chain_drop(hmp, *parentp);      /* drop old parent */
748                 *parentp = parent;                      /* new parent */
749         }
750
751 again2:
752         /*
753          * Locate the blockref array.  Currently we do a fully associative
754          * search through the array.
755          */
756         switch(parent->bref.type) {
757         case HAMMER2_BREF_TYPE_INODE:
758                 base = &parent->data->ipdata.u.blockset.blockref[0];
759                 count = HAMMER2_SET_COUNT;
760                 break;
761         case HAMMER2_BREF_TYPE_INDIRECT:
762                 base = &parent->data->npdata.blockref[0];
763                 count = HAMMER2_IND_COUNT;
764                 break;
765         case HAMMER2_BREF_TYPE_VOLUME:
766                 base = &hmp->voldata.sroot_blockset.blockref[0];
767                 count = HAMMER2_SET_COUNT;
768                 break;
769         default:
770                 panic("hammer2_chain_push: unrecognized blockref type: %d",
771                       parent->bref.type);
772                 base = NULL;    /* safety */
773                 count = 0;      /* safety */
774                 break;
775         }
776         KKASSERT(i <= count);
777
778         /*
779          * Look for the key.  If we are unable to find a match and an exact
780          * match was requested we return NULL.  If a range was requested we
781          * run hammer2_chain_next() to iterate.
782          */
783         bref = NULL;
784         while (i < count) {
785                 tmp = hammer2_chain_find(hmp, parent, i);
786                 bref = (tmp) ? &tmp->bref : &base[i];
787                 if (bref->type == 0) {
788                         ++i;
789                         continue;
790                 }
791                 kprintf("nextxx(%016jx,%d) %d: %016jx/%d\n",
792                         parent->bref.data_off, i,
793                         bref->type,bref->key, bref->keybits);
794                 scan_beg = bref->key;
795                 scan_end = scan_beg + ((hammer2_key_t)1 << bref->keybits) - 1;
796                 if (key_beg <= scan_end && key_end >= scan_beg)
797                         break;
798                 ++i;
799         }
800
801         /*
802          * If we couldn't find a match recurse up a parent to continue the
803          * search.
804          */
805         if (i == count)
806                 goto again;
807
808         /*
809          * Acquire the new chain element.  If the chain element is an
810          * indirect block we must search recursively.
811          */
812         chain = hammer2_chain_get(hmp, parent, i, flags);
813         if (chain == NULL)
814                 return (NULL);
815
816         /*
817          * If the chain element is an indirect block it becomes the new
818          * parent and we loop on it.
819          */
820         if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
821                 hammer2_chain_put(hmp, parent);
822                 *parentp = parent = chain;
823                 i = 0;
824                 goto again2;
825         }
826
827         /*
828          * All done, return chain
829          */
830         return (chain);
831 }
832
833 /*
834  * Create and return a new hammer2 system memory structure of the specified
835  * key, type and size and insert it under (parent).  (parent) is typically
836  * acquired as a side effect of issuing a prior lookup.  parent must be locked
837  * and held.
838  *
839  * Non-indirect types will automatically allocate indirect blocks as required
840  * if the new item does not fit in the current (parent).
841  *
842  * Indirect types will move a portion of the existing blockref array in
843  * (parent) into the new indirect type and then use one of the free slots
844  * to emplace the new indirect type.
845  *
846  * A new locked, referenced chain element is returned of the specified type.
847  * This element will also be marked as modified and contain a data area
848  * ready for initialization.
849  */
850 hammer2_chain_t *
851 hammer2_chain_create(hammer2_mount_t *hmp, hammer2_chain_t *parent,
852                      hammer2_key_t key, int keybits, int type, size_t bytes)
853 {
854         hammer2_blockref_t dummy;
855         hammer2_blockref_t *base;
856         hammer2_blockref_t *bref;
857         hammer2_chain_t *chain;
858         hammer2_chain_t dummy_chain;
859         int count;
860         int i;
861         int unlock_parent = 0;
862
863         /*
864          * First allocate media space and construct the dummy bref, then
865          * allocate the in-memory chain structure.
866          */
867         bzero(&dummy, sizeof(dummy));
868         dummy.type = type;
869         dummy.key = key;
870         dummy.keybits = keybits;
871         dummy.data_off = (hammer2_off_t)hammer2_freemap_bytes_to_radix(bytes);
872         chain = hammer2_chain_alloc(hmp, &dummy);
873
874         /*
875          * Recalculate bytes to reflect the actual media block allocation,
876          * then allocate the local memory copy.  This is a new structure
877          * so no I/O is performed.
878          */
879         bytes = (hammer2_off_t)1 <<
880                 (int)(chain->bref.data_off & HAMMER2_OFF_MASK_RADIX);
881
882         switch(type) {
883         case HAMMER2_BREF_TYPE_VOLUME:
884                 panic("hammer2_chain_create: called with volume type");
885                 break;
886         case HAMMER2_BREF_TYPE_INODE:
887                 KKASSERT(bytes == HAMMER2_INODE_BYTES);
888                 chain->data = (void *)&chain->u.ip->ip_data;
889                 break;
890         default:
891                 /* leave chain->data NULL */
892                 KKASSERT(chain->data == NULL);
893                 break;
894         }
895         atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL);
896
897 again:
898         /*
899          * Locate a free blockref in the parent's array
900          */
901         switch(parent->bref.type) {
902         case HAMMER2_BREF_TYPE_INODE:
903                 KKASSERT(parent->data != NULL);
904                 base = &parent->data->ipdata.u.blockset.blockref[0];
905                 count = HAMMER2_SET_COUNT;
906                 break;
907         case HAMMER2_BREF_TYPE_INDIRECT:
908                 KKASSERT(parent->data != NULL);
909                 base = &parent->data->npdata.blockref[0];
910                 count = HAMMER2_IND_COUNT;
911                 break;
912         case HAMMER2_BREF_TYPE_VOLUME:
913                 KKASSERT(parent->data != NULL);
914                 base = &hmp->voldata.sroot_blockset.blockref[0];
915                 count = HAMMER2_SET_COUNT;
916                 break;
917         default:
918                 panic("hammer2_chain_push: unrecognized blockref type: %d",
919                       parent->bref.type);
920                 count = 0;
921                 break;
922         }
923
924         /*
925          * Scan for an unallocated bref, also skipping any slots occupied
926          * by in-memory chain elements that may not yet have been updated
927          * in the parent's bref array.
928          */
929         bzero(&dummy_chain, sizeof(dummy_chain));
930         bref = NULL;
931         for (i = 0; i < count; ++i) {
932                 bref = &base[i];
933                 dummy_chain.index = i;
934                 if (bref->type == 0 &&
935                     SPLAY_FIND(hammer2_chain_splay,
936                                &parent->shead, &dummy_chain) == NULL) {
937                         break;
938                 }
939         }
940
941         /*
942          * If no free blockref count be found we must create an indirect
943          * block and move a number of blockrefs into it.  With the parent
944          * locked we can safely lock each child in order to move it without
945          * causing a deadlock.
946          *
947          * This may return the new indirect block or the old parent depending
948          * on where the key falls.
949          */
950         if (i == count) {
951                 hammer2_chain_t *nparent;
952
953                 nparent = hammer2_chain_create_indirect(hmp, parent,
954                                                         key, keybits);
955                 if (nparent == NULL) {
956                         hammer2_chain_free(hmp, chain);
957                         chain = NULL;
958                         goto done;
959                 }
960                 if (parent != nparent) {
961                         if (unlock_parent)
962                                 hammer2_chain_put(hmp, parent);
963                         parent = nparent;
964                         unlock_parent = 1;
965                 }
966                 goto again;
967         }
968
969         /*
970          * Link the chain into its parent.
971          */
972         chain->parent = parent;
973         chain->index = i;
974         if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, chain))
975                 panic("hammer2_chain_link: collision");
976         KKASSERT(parent->refs > 1);
977         atomic_add_int(&parent->refs, 1);
978
979         /*
980          * Additional linkage for inodes.  Reuse the parent pointer to
981          * find the parent directory.
982          */
983         if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
984                 hammer2_chain_t *scan = parent;
985                 while (scan->bref.type == HAMMER2_BREF_TYPE_INDIRECT)
986                         scan = scan->parent;
987                 if (scan->bref.type == HAMMER2_BREF_TYPE_INODE)
988                         chain->u.ip->pip = scan->u.ip;
989         }
990
991         /*
992          * Mark the newly created chain element as modified and fully
993          * resolve the chain->data pointer.
994          *
995          * Chain elements with embedded data will not issue I/O at this time.
996          * A new block will be allocated for the buffer but not instantiated.
997          *
998          * Chain elements which do not use embedded data will allocate
999          * the new block AND instantiate its buffer cache buffer, pointing
1000          * the data at the bp.
1001          */
1002         hammer2_chain_modify(hmp, chain);
1003
1004 done:
1005         if (unlock_parent)
1006                 hammer2_chain_put(hmp, parent);
1007         return (chain);
1008 }
1009
1010 /*
1011  * Create an indirect block that covers one or more of the elements in the
1012  * current parent.  Either returns the existing parent with no locking or
1013  * ref changes or returns the new indirect block locked and referenced,
1014  * depending on what the specified key falls into.
1015  *
1016  * The key/keybits for the indirect mode only needs to follow three rules:
1017  *
1018  * (1) That all elements underneath it fit within its key space and
1019  *
1020  * (2) That all elements outside it are outside its key space.
1021  *
1022  * (3) When creating the new indirect block any elements in the current
1023  *     parent that fit within the new indirect block's keyspace must be
1024  *     moved into the new indirect block.
1025  *
1026  * (4) The keyspace chosen for the inserted indirect block CAN cover a wider
1027  *     keyspace the the current parent, but lookup/iteration rules will
1028  *     ensure (and must ensure) that rule (2) for all parents leading up
1029  *     to the nearest inode or the root volume header is adhered to.  This
1030  *     is accomplished by always recursing through matching keyspaces in
1031  *     the hammer2_chain_lookup() and hammer2_chain_next() API.
1032  *
1033  * The current implementation calculates the current worst-case keyspace by
1034  * iterating the current parent and then divides it into two halves, choosing
1035  * whichever half has the most elements (not necessarily the half containing
1036  * the requested key).
1037  *
1038  * We can also opt to use the half with the least number of elements.  This
1039  * causes lower-numbered keys (aka logical file offsets) to recurse through
1040  * fewer indirect blocks and higher-numbered keys to recurse through more.
1041  * This also has the risk of not moving enough elements to the new indirect
1042  * block and being forced to create several indirect blocks before the element
1043  * can be inserted.
1044  */
1045 static
1046 hammer2_chain_t *
1047 hammer2_chain_create_indirect(hammer2_mount_t *hmp, hammer2_chain_t *parent,
1048                               hammer2_key_t create_key, int create_bits)
1049 {
1050         hammer2_blockref_t *base;
1051         hammer2_blockref_t *bref;
1052         hammer2_chain_t *chain;
1053         hammer2_chain_t *ichain;
1054         hammer2_chain_t dummy;
1055         hammer2_key_t key = create_key;
1056         int keybits = create_bits;
1057         int locount = 0;
1058         int hicount = 0;
1059         int count;
1060         int i;
1061
1062         kprintf("create_indirect1\n");
1063
1064         /*
1065          * Mark the parent modified so our base[] pointer remains valid
1066          * while we move entries.
1067          */
1068         hammer2_chain_modify(hmp, parent);
1069
1070         /*
1071          * Locate a free blockref in the parent's array
1072          */
1073         switch(parent->bref.type) {
1074         case HAMMER2_BREF_TYPE_INODE:
1075                 base = &parent->data->ipdata.u.blockset.blockref[0];
1076                 count = HAMMER2_SET_COUNT;
1077                 break;
1078         case HAMMER2_BREF_TYPE_INDIRECT:
1079                 base = &parent->data->npdata.blockref[0];
1080                 count = HAMMER2_IND_COUNT;
1081                 break;
1082         case HAMMER2_BREF_TYPE_VOLUME:
1083                 base = &hmp->voldata.sroot_blockset.blockref[0];
1084                 count = HAMMER2_SET_COUNT;
1085                 break;
1086         default:
1087                 panic("hammer2_chain_push: unrecognized blockref type: %d",
1088                       parent->bref.type);
1089                 count = 0;
1090                 break;
1091         }
1092
1093         /*
1094          * Scan for an unallocated bref, also skipping any slots occupied
1095          * by in-memory chain elements that may not yet have been updated
1096          * in the parent's bref array.
1097          */
1098         bzero(&dummy, sizeof(dummy));
1099         for (i = 0; i < count; ++i) {
1100                 int nkeybits;
1101
1102                 bref = &base[i];
1103                 if (bref->type == 0) {
1104                         dummy.index = i;
1105                         chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead,
1106                                            &dummy);
1107                         if (chain == NULL)
1108                                 continue;
1109                         bref = &chain->bref;
1110                 }
1111
1112                 /*
1113                  * Expand are calculated key range (key, keybits) to fit
1114                  * the scanned key.
1115                  */
1116                 nkeybits = keybits;
1117                 if (nkeybits < bref->keybits)
1118                         nkeybits = bref->keybits;
1119                 while ((~(((hammer2_key_t)1 << nkeybits) - 1) &
1120                         (key ^ bref->key)) != 0) {
1121                         ++nkeybits;
1122                 }
1123
1124                 /*
1125                  * If the new key range is larger we have to determine
1126                  * which side of the new key range the existing keys fall
1127                  * under by checking the high bit, then collapsing the
1128                  * locount into the hicount or vise-versa.
1129                  */
1130                 if (keybits != nkeybits) {
1131                         if (((hammer2_key_t)1 << (nkeybits - 1)) & key) {
1132                                 hicount += locount;
1133                                 locount = 0;
1134                         } else {
1135                                 locount += hicount;
1136                                 hicount = 0;
1137                         }
1138                         keybits = nkeybits;
1139                 }
1140
1141                 /*
1142                  * The newly scanned key will be in the lower half or the
1143                  * higher half of the (new) key range.
1144                  */
1145                 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key)
1146                         ++hicount;
1147                 else
1148                         ++locount;
1149                 kprintf("consolidate %016jx keybits %d lo %d hi %d\n",
1150                         bref->key, keybits, locount, hicount);
1151         }
1152
1153         /*
1154          * The key for the indirect block will be the lower half or
1155          * the upper half of the above calculated keyspace.
1156          */
1157         key &= ~(((hammer2_key_t)1 << keybits) - 1);
1158         if (hammer2_indirect_optimize) {
1159                 /*
1160                  * Insert node for least number of keys, best for linear
1161                  * files (?)  XXX won't work if least number is 0.
1162                  */
1163                 panic("hammer2_indirect_optimize not working yet");
1164                 if (hicount < locount)
1165                         key |= (hammer2_key_t)1 << (keybits - 1);
1166         } else {
1167                 /*
1168                  * Insert node for most number of keys, best for heavily
1169                  * fragmented files.
1170                  */
1171                 if (hicount > locount)
1172                         key |= (hammer2_key_t)1 << (keybits - 1);
1173         }
1174
1175         /*
1176          * Ok, create our new indirect block
1177          */
1178         dummy.bref.type = HAMMER2_BREF_TYPE_INDIRECT;
1179         dummy.bref.key = key;
1180         dummy.bref.keybits = keybits - 1;
1181         dummy.bref.data_off = (hammer2_off_t)
1182                             hammer2_freemap_bytes_to_radix(HAMMER2_PBUFSIZE);
1183         dummy.index = -1;       /* not yet assigned */
1184         ichain = hammer2_chain_alloc(hmp, &dummy.bref);
1185         kprintf("create_indirect2: allocate %016jx/%d\n", key, keybits);
1186
1187         /*
1188          * Iterate the original parent and move the matching brefs into
1189          * the new indirect block.  All the keys are inclusive of keybits
1190          * so we only have to check bit (keybits - 1).
1191          */
1192         for (i = 0; i < count; ++i) {
1193                 bref = &base[i];
1194                 if (bref->type == 0) {
1195                         dummy.index = i;
1196                         chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead,
1197                                            &dummy);
1198                         if (chain == NULL) {
1199                                 /*
1200                                  * Select index indirect block is placed in
1201                                  */
1202                                 if (ichain->index < 0)
1203                                         ichain->index = i;
1204                                 continue;
1205                         }
1206                         kprintf("pre-move index %d\n", i);
1207                         bref = &chain->bref;
1208                 }
1209
1210                 /*
1211                  * Skip keys not in the chosen half (low or high), only bit
1212                  * (keybits - 1) needs to be compared but for safety we
1213                  * will compare all msb bits plus that bit again.
1214                  */
1215                 if ((~(((hammer2_key_t)1 << (keybits - 1)) - 1) &
1216                     (key ^ bref->key)) != 0) {
1217                         continue;
1218                 }
1219
1220                 /*
1221                  * This element is being moved, its slot is available
1222                  * for our indirect block.
1223                  */
1224                 kprintf("move index %d\n", i);
1225                 if (ichain->index < 0)
1226                         ichain->index = i;
1227                 bzero(&base[i], sizeof(base[i]));
1228
1229                 /*
1230                  * Load the new indirect block by acquiring or allocating
1231                  * the related chain entries, then simply move it to the
1232                  * new parent (ichain).
1233                  *
1234                  * Flagging the new chain entry MOVED will cause a flush
1235                  * to synchronize its block into the new indirect block.
1236                  * We must still set SUBMODIFIED but we do that after the
1237                  * loop.
1238                  */
1239                 chain = hammer2_chain_get(hmp, parent, i,
1240                                           HAMMER2_LOOKUP_NOLOCK);
1241                 SPLAY_REMOVE(hammer2_chain_splay, &parent->shead, chain);
1242                 if (SPLAY_INSERT(hammer2_chain_splay, &ichain->shead, chain))
1243                         panic("hammer2_chain_create_indirect: collision");
1244                 chain->parent = ichain;
1245                 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED);
1246                 atomic_add_int(&parent->refs, -1);
1247                 atomic_add_int(&ichain->refs, 1);
1248                 hammer2_chain_drop(hmp, chain);
1249                 KKASSERT(parent->refs > 0);
1250                 chain = NULL;
1251         }
1252
1253         /*
1254          * Insert the new indirect block into the parent now that we've
1255          * cleared out some entries in the parent.  We calculated a good
1256          * insertion index in the loop above (ichain->index).
1257          */
1258         KKASSERT(ichain->index >= 0);
1259         kprintf("insert ichain at %d\n", ichain->index);
1260         if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, ichain))
1261                 panic("hammer2_chain_create_indirect: ichain insertion");
1262         ichain->parent = parent;
1263         atomic_add_int(&parent->refs, 1);
1264
1265         /*
1266          * Mark the new indirect block modified after insertion, which
1267          * will propagate up through parent all the way to the root and
1268          * also allocate the physical block in ichain for our caller.
1269          *
1270          * We have to set SUBMODIFIED in ichain's flags manually so the
1271          * flusher knows it has to recurse through it to get to all of
1272          * our moved blocks.
1273          */
1274         hammer2_chain_modify(hmp, ichain);
1275         atomic_set_int(&ichain->flags, HAMMER2_CHAIN_SUBMODIFIED);
1276
1277         /*
1278          * Figure out what to return.
1279          */
1280         if (create_bits >= keybits) {
1281                 /*
1282                  * Key being created is way outside the key range,
1283                  * return the original parent.
1284                  */
1285                 hammer2_chain_put(hmp, ichain);
1286         } else if (~(((hammer2_key_t)1 << (keybits - 1)) - 1) &
1287                    (create_key ^ key)) {
1288                 /*
1289                  * Key being created is outside the key range,
1290                  * return the original parent.
1291                  */
1292                 hammer2_chain_put(hmp, ichain);
1293         } else {
1294                 /*
1295                  * Otherwise its in the range, return the new parent.
1296                  */
1297                 parent = ichain;
1298         }
1299
1300         kprintf("create_indirect9\n");
1301
1302         return(parent);
1303 }
1304
1305 /*
1306  * Physically delete the specified chain element.  Note that inodes with
1307  * open descriptors should not be deleted (as with other filesystems) until
1308  * the last open descriptor is closed.
1309  *
1310  * This routine will remove the chain element from its parent and potentially
1311  * also recurse upward and delete indirect blocks which become empty as a
1312  * side effect.
1313  *
1314  * The caller must pass a pointer to the chain's parent, also locked and
1315  * referenced.  (*parentp) will be modified in a manner similar to a lookup
1316  * or iteration when indirect blocks are also deleted as a side effect.
1317  */
1318 void
1319 hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
1320                      hammer2_chain_t *chain)
1321 {
1322 }
1323
1324 /*
1325  * Recursively flush the specified chain.  The chain is locked and
1326  * referenced by the caller and will remain so on return.
1327  *
1328  * This cannot be called with the volume header's vchain
1329  */
1330 void
1331 hammer2_chain_flush(hammer2_mount_t *hmp, hammer2_chain_t *chain,
1332                     hammer2_blockref_t *parent_bref)
1333 {
1334         /*
1335          * Flush any children of this chain entry.
1336          */
1337         if (chain->flags & HAMMER2_CHAIN_SUBMODIFIED) {
1338                 hammer2_blockref_t *base;
1339                 hammer2_blockref_t bref;
1340                 hammer2_chain_t *scan;
1341                 hammer2_chain_t *next;
1342                 int count;
1343                 int submodified = 0;
1344
1345                 /*
1346                  * Modifications to the children will propagate up, forcing
1347                  * us to become modified and copy-on-write too.
1348                  */
1349                 hammer2_chain_modify(hmp, chain);
1350
1351                 /*
1352                  * The blockref in the parent's array must be repointed at
1353                  * the new block allocated by the child after its flush.
1354                  *
1355                  * Calculate the base of the array.
1356                  */
1357                 switch(chain->bref.type) {
1358                 case HAMMER2_BREF_TYPE_INODE:
1359                         KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
1360                         base = &chain->data->ipdata.u.blockset.blockref[0];
1361                         count = HAMMER2_SET_COUNT;
1362                         break;
1363                 case HAMMER2_BREF_TYPE_INDIRECT:
1364                         base = &chain->data->npdata.blockref[0];
1365                         count = HAMMER2_IND_COUNT;
1366                         break;
1367                 case HAMMER2_BREF_TYPE_VOLUME:
1368                         KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
1369                         base = &hmp->voldata.sroot_blockset.blockref[0];
1370                         count = HAMMER2_SET_COUNT;
1371                         break;
1372                 default:
1373                         base = NULL;
1374                         panic("hammer2_chain_get: unrecognized blockref type: %d",
1375                               chain->bref.type);
1376                 }
1377
1378                 /*
1379                  * Flush the children and update the blockrefs in the parent.
1380                  * Be careful of ripouts during the loop.
1381                  */
1382                 next = SPLAY_MIN(hammer2_chain_splay, &chain->shead);
1383                 while ((scan = next) != NULL) {
1384                         next = SPLAY_NEXT(hammer2_chain_splay, &chain->shead,
1385                                           scan);
1386                         if (scan->flags & (HAMMER2_CHAIN_SUBMODIFIED |
1387                                            HAMMER2_CHAIN_MODIFIED |
1388                                            HAMMER2_CHAIN_MOVED)) {
1389                                 hammer2_chain_ref(hmp, scan);
1390                                 hammer2_chain_lock(hmp, scan);
1391                                 bref = base[scan->index];
1392                                 hammer2_chain_flush(hmp, scan, &bref);
1393                                 if (scan->flags & (HAMMER2_CHAIN_SUBMODIFIED |
1394                                                    HAMMER2_CHAIN_MODIFIED)) {
1395                                         submodified = 1;
1396                                         kprintf("flush race, sub dirty\n");
1397                                 } else {
1398                                         KKASSERT(scan->index < count);
1399                                         base[scan->index] = bref;
1400                                         atomic_clear_int(&scan->flags,
1401                                                          HAMMER2_CHAIN_MOVED);
1402                                 }
1403                                 hammer2_chain_put(hmp, scan);
1404                         }
1405                 }
1406                 if (submodified == 0) {
1407                         atomic_clear_int(&chain->flags,
1408                                          HAMMER2_CHAIN_SUBMODIFIED);
1409                 }
1410         }
1411
1412         /*
1413          * Flush this chain entry only if it is marked modified.
1414          *
1415          * If the chain entry was moved we must still updated *parent_bref
1416          * or the indirect block won't be adjusted to point to us.
1417          */
1418         if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) {
1419                 if (chain->flags & HAMMER2_CHAIN_MOVED)
1420                         *parent_bref = chain->bref;
1421                 return;
1422         }
1423
1424         /*
1425          * If this is part of a recursive flush we can go ahead and write
1426          * out the buffer cache buffer and pass a new bref back up the chain.
1427          *
1428          * This will never be a volume header.
1429          */
1430         if (parent_bref) {
1431                 hammer2_blockref_t *bref;
1432                 hammer2_off_t off_hi;
1433                 struct buf *bp;
1434                 size_t off_lo;
1435                 size_t bytes;
1436                 int error;
1437
1438                 KKASSERT(chain->data != NULL);
1439                 bref = &chain->bref;
1440
1441                 off_hi = bref->data_off & HAMMER2_OFF_MASK_HI;
1442                 off_lo = (size_t)bref->data_off & HAMMER2_OFF_MASK_LO;
1443                 bytes = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
1444                 KKASSERT(off_hi != 0);  /* not the root volume header */
1445
1446                 if (chain->bp) {
1447                         /*
1448                          * The data is mapped directly to the bp and will be
1449                          * written out when the chain is unlocked by the
1450                          * parent.  However, since we are clearing the
1451                          * MODIFIED flag we have to set the FLUSHED flag
1452                          * so the hammer2_chain_unlock() code knows to
1453                          * bdwrite() the buffer.
1454                          */
1455                         atomic_set_int(&chain->flags, HAMMER2_CHAIN_FLUSHED);
1456                 } else {
1457                         /*
1458                          * The data is embedded, we have to acquire the
1459                          * buffer cache buffer and copy the data into it.
1460                          */
1461                         bp = NULL;
1462                         error = bread(hmp->devvp, off_hi,
1463                                       HAMMER2_PBUFSIZE, &bp);
1464                         KKASSERT(error == 0); /* XXX */
1465
1466                         /*
1467                          * Copy the data to the buffer, mark the buffer
1468                          * dirty, and convert the chain to unmodified.
1469                          */
1470                         bcopy(chain->data, (char *)bp->b_data + off_lo, bytes);
1471                         bdwrite(bp);
1472                         bp = NULL;
1473
1474                         chain->bref.check.iscsi32.value =
1475                                         hammer2_icrc32(chain->data, bytes);
1476                 }
1477
1478                 /*
1479                  * Return information on the new block to the parent.
1480                  */
1481                 *parent_bref = chain->bref;
1482                 hammer2_chain_drop(hmp, chain); /* drop ref tracking mod bit */
1483                 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED);
1484         } else {
1485                 hammer2_blockref_t *bref;
1486
1487                 KKASSERT(chain->data != NULL);
1488                 KKASSERT(chain->bp == NULL);
1489                 bref = &chain->bref;
1490
1491                 switch(bref->type) {
1492                 case HAMMER2_BREF_TYPE_VOLUME:
1493                         hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]=
1494                                 hammer2_icrc32(
1495                                         (char *)&hmp->voldata +
1496                                          HAMMER2_VOLUME_ICRC1_OFF,
1497                                         HAMMER2_VOLUME_ICRC1_SIZE);
1498                         hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]=
1499                                 hammer2_icrc32(
1500                                         (char *)&hmp->voldata +
1501                                          HAMMER2_VOLUME_ICRC0_OFF,
1502                                         HAMMER2_VOLUME_ICRC0_SIZE);
1503                         hmp->voldata.icrc_volheader =
1504                                 hammer2_icrc32(
1505                                         (char *)&hmp->voldata +
1506                                          HAMMER2_VOLUME_ICRCVH_OFF,
1507                                         HAMMER2_VOLUME_ICRCVH_SIZE);
1508                         break;
1509                 }
1510         }
1511 }