Merge branch 'vendor/GCC50'
[dragonfly.git] / sys / vfs / hammer2 / hammer2_vfsops.c
1 /*
2  * Copyright (c) 2011-2015 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression)
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/nlookup.h>
39 #include <sys/vnode.h>
40 #include <sys/mount.h>
41 #include <sys/fcntl.h>
42 #include <sys/buf.h>
43 #include <sys/uuid.h>
44 #include <sys/vfsops.h>
45 #include <sys/sysctl.h>
46 #include <sys/socket.h>
47 #include <sys/objcache.h>
48
49 #include <sys/proc.h>
50 #include <sys/namei.h>
51 #include <sys/mountctl.h>
52 #include <sys/dirent.h>
53 #include <sys/uio.h>
54
55 #include <sys/mutex.h>
56 #include <sys/mutex2.h>
57
58 #include "hammer2.h"
59 #include "hammer2_disk.h"
60 #include "hammer2_mount.h"
61 #include "hammer2_lz4.h"
62
63 #include "zlib/hammer2_zlib.h"
64
65 #define REPORT_REFS_ERRORS 1    /* XXX remove me */
66
67 MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache");
68
69 struct hammer2_sync_info {
70         hammer2_trans_t trans;
71         int error;
72         int waitfor;
73 };
74
75 TAILQ_HEAD(hammer2_mntlist, hammer2_dev);
76 TAILQ_HEAD(hammer2_pfslist, hammer2_pfs);
77 static struct hammer2_mntlist hammer2_mntlist;
78 static struct hammer2_pfslist hammer2_pfslist;
79 static struct lock hammer2_mntlk;
80
81 int hammer2_debug;
82 int hammer2_cluster_enable = 1;
83 int hammer2_hardlink_enable = 1;
84 int hammer2_flush_pipe = 100;
85 int hammer2_synchronous_flush = 1;
86 int hammer2_dio_count;
87 long hammer2_limit_dirty_chains;
88 long hammer2_iod_file_read;
89 long hammer2_iod_meta_read;
90 long hammer2_iod_indr_read;
91 long hammer2_iod_fmap_read;
92 long hammer2_iod_volu_read;
93 long hammer2_iod_file_write;
94 long hammer2_iod_meta_write;
95 long hammer2_iod_indr_write;
96 long hammer2_iod_fmap_write;
97 long hammer2_iod_volu_write;
98 long hammer2_ioa_file_read;
99 long hammer2_ioa_meta_read;
100 long hammer2_ioa_indr_read;
101 long hammer2_ioa_fmap_read;
102 long hammer2_ioa_volu_read;
103 long hammer2_ioa_fmap_write;
104 long hammer2_ioa_file_write;
105 long hammer2_ioa_meta_write;
106 long hammer2_ioa_indr_write;
107 long hammer2_ioa_volu_write;
108
109 MALLOC_DECLARE(C_BUFFER);
110 MALLOC_DEFINE(C_BUFFER, "compbuffer", "Buffer used for compression.");
111
112 MALLOC_DECLARE(D_BUFFER);
113 MALLOC_DEFINE(D_BUFFER, "decompbuffer", "Buffer used for decompression.");
114
115 SYSCTL_NODE(_vfs, OID_AUTO, hammer2, CTLFLAG_RW, 0, "HAMMER2 filesystem");
116
117 SYSCTL_INT(_vfs_hammer2, OID_AUTO, debug, CTLFLAG_RW,
118            &hammer2_debug, 0, "");
119 SYSCTL_INT(_vfs_hammer2, OID_AUTO, cluster_enable, CTLFLAG_RW,
120            &hammer2_cluster_enable, 0, "");
121 SYSCTL_INT(_vfs_hammer2, OID_AUTO, hardlink_enable, CTLFLAG_RW,
122            &hammer2_hardlink_enable, 0, "");
123 SYSCTL_INT(_vfs_hammer2, OID_AUTO, flush_pipe, CTLFLAG_RW,
124            &hammer2_flush_pipe, 0, "");
125 SYSCTL_INT(_vfs_hammer2, OID_AUTO, synchronous_flush, CTLFLAG_RW,
126            &hammer2_synchronous_flush, 0, "");
127 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, limit_dirty_chains, CTLFLAG_RW,
128            &hammer2_limit_dirty_chains, 0, "");
129 SYSCTL_INT(_vfs_hammer2, OID_AUTO, dio_count, CTLFLAG_RD,
130            &hammer2_dio_count, 0, "");
131
132 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_read, CTLFLAG_RW,
133            &hammer2_iod_file_read, 0, "");
134 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_read, CTLFLAG_RW,
135            &hammer2_iod_meta_read, 0, "");
136 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_read, CTLFLAG_RW,
137            &hammer2_iod_indr_read, 0, "");
138 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_fmap_read, CTLFLAG_RW,
139            &hammer2_iod_fmap_read, 0, "");
140 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_volu_read, CTLFLAG_RW,
141            &hammer2_iod_volu_read, 0, "");
142
143 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_write, CTLFLAG_RW,
144            &hammer2_iod_file_write, 0, "");
145 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_write, CTLFLAG_RW,
146            &hammer2_iod_meta_write, 0, "");
147 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_write, CTLFLAG_RW,
148            &hammer2_iod_indr_write, 0, "");
149 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_fmap_write, CTLFLAG_RW,
150            &hammer2_iod_fmap_write, 0, "");
151 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_volu_write, CTLFLAG_RW,
152            &hammer2_iod_volu_write, 0, "");
153
154 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_read, CTLFLAG_RW,
155            &hammer2_ioa_file_read, 0, "");
156 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_read, CTLFLAG_RW,
157            &hammer2_ioa_meta_read, 0, "");
158 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_read, CTLFLAG_RW,
159            &hammer2_ioa_indr_read, 0, "");
160 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_fmap_read, CTLFLAG_RW,
161            &hammer2_ioa_fmap_read, 0, "");
162 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_volu_read, CTLFLAG_RW,
163            &hammer2_ioa_volu_read, 0, "");
164
165 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_write, CTLFLAG_RW,
166            &hammer2_ioa_file_write, 0, "");
167 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_write, CTLFLAG_RW,
168            &hammer2_ioa_meta_write, 0, "");
169 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_write, CTLFLAG_RW,
170            &hammer2_ioa_indr_write, 0, "");
171 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_fmap_write, CTLFLAG_RW,
172            &hammer2_ioa_fmap_write, 0, "");
173 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_volu_write, CTLFLAG_RW,
174            &hammer2_ioa_volu_write, 0, "");
175
176 static int hammer2_vfs_init(struct vfsconf *conf);
177 static int hammer2_vfs_uninit(struct vfsconf *vfsp);
178 static int hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
179                                 struct ucred *cred);
180 static int hammer2_remount(hammer2_dev_t *, struct mount *, char *,
181                                 struct vnode *, struct ucred *);
182 static int hammer2_recovery(hammer2_dev_t *hmp);
183 static int hammer2_vfs_unmount(struct mount *mp, int mntflags);
184 static int hammer2_vfs_root(struct mount *mp, struct vnode **vpp);
185 static int hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp,
186                                 struct ucred *cred);
187 static int hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp,
188                                 struct ucred *cred);
189 static int hammer2_vfs_vget(struct mount *mp, struct vnode *dvp,
190                                 ino_t ino, struct vnode **vpp);
191 static int hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp,
192                                 struct fid *fhp, struct vnode **vpp);
193 static int hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp);
194 static int hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam,
195                                 int *exflagsp, struct ucred **credanonp);
196
197 static int hammer2_install_volume_header(hammer2_dev_t *hmp);
198 static int hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data);
199
200 static void hammer2_update_pmps(hammer2_dev_t *hmp);
201 static void hammer2_write_thread(void *arg);
202
203 static void hammer2_mount_helper(struct mount *mp, hammer2_pfs_t *pmp);
204 static void hammer2_unmount_helper(struct mount *mp, hammer2_pfs_t *pmp,
205                                 hammer2_dev_t *hmp);
206
207 /* 
208  * Functions for compression in threads,
209  * from hammer2_vnops.c
210  */
211 static void hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans,
212                                 hammer2_inode_t *ip,
213                                 const hammer2_inode_data_t *ripdata,
214                                 hammer2_cluster_t *cparent,
215                                 hammer2_key_t lbase, int ioflag, int pblksize,
216                                 int *errorp);
217 static void hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans,
218                                 hammer2_inode_t *ip,
219                                 const hammer2_inode_data_t *ripdata,
220                                 hammer2_cluster_t *cparent,
221                                 hammer2_key_t lbase, int ioflag,
222                                 int pblksize, int *errorp,
223                                 int comp_algo, int check_algo);
224 static void hammer2_zero_check_and_write(struct buf *bp,
225                                 hammer2_trans_t *trans, hammer2_inode_t *ip,
226                                 const hammer2_inode_data_t *ripdata,
227                                 hammer2_cluster_t *cparent,
228                                 hammer2_key_t lbase,
229                                 int ioflag, int pblksize, int *errorp,
230                                 int check_algo);
231 static int test_block_zeros(const char *buf, size_t bytes);
232 static void zero_write(struct buf *bp, hammer2_trans_t *trans,
233                                 hammer2_inode_t *ip,
234                                 const hammer2_inode_data_t *ripdata,
235                                 hammer2_cluster_t *cparent,
236                                 hammer2_key_t lbase,
237                                 int *errorp);
238 static void hammer2_write_bp(hammer2_cluster_t *cluster, struct buf *bp,
239                                 int ioflag, int pblksize, int *errorp,
240                                 int check_algo);
241
242 /*
243  * HAMMER2 vfs operations.
244  */
245 static struct vfsops hammer2_vfsops = {
246         .vfs_init       = hammer2_vfs_init,
247         .vfs_uninit     = hammer2_vfs_uninit,
248         .vfs_sync       = hammer2_vfs_sync,
249         .vfs_mount      = hammer2_vfs_mount,
250         .vfs_unmount    = hammer2_vfs_unmount,
251         .vfs_root       = hammer2_vfs_root,
252         .vfs_statfs     = hammer2_vfs_statfs,
253         .vfs_statvfs    = hammer2_vfs_statvfs,
254         .vfs_vget       = hammer2_vfs_vget,
255         .vfs_vptofh     = hammer2_vfs_vptofh,
256         .vfs_fhtovp     = hammer2_vfs_fhtovp,
257         .vfs_checkexp   = hammer2_vfs_checkexp
258 };
259
260 MALLOC_DEFINE(M_HAMMER2, "HAMMER2-mount", "");
261
262 VFS_SET(hammer2_vfsops, hammer2, 0);
263 MODULE_VERSION(hammer2, 1);
264
265 static
266 int
267 hammer2_vfs_init(struct vfsconf *conf)
268 {
269         static struct objcache_malloc_args margs_read;
270         static struct objcache_malloc_args margs_write;
271
272         int error;
273
274         error = 0;
275
276         if (HAMMER2_BLOCKREF_BYTES != sizeof(struct hammer2_blockref))
277                 error = EINVAL;
278         if (HAMMER2_INODE_BYTES != sizeof(struct hammer2_inode_data))
279                 error = EINVAL;
280         if (HAMMER2_VOLUME_BYTES != sizeof(struct hammer2_volume_data))
281                 error = EINVAL;
282
283         if (error)
284                 kprintf("HAMMER2 structure size mismatch; cannot continue.\n");
285         
286         margs_read.objsize = 65536;
287         margs_read.mtype = D_BUFFER;
288         
289         margs_write.objsize = 32768;
290         margs_write.mtype = C_BUFFER;
291         
292         cache_buffer_read = objcache_create(margs_read.mtype->ks_shortdesc,
293                                 0, 1, NULL, NULL, NULL, objcache_malloc_alloc,
294                                 objcache_malloc_free, &margs_read);
295         cache_buffer_write = objcache_create(margs_write.mtype->ks_shortdesc,
296                                 0, 1, NULL, NULL, NULL, objcache_malloc_alloc,
297                                 objcache_malloc_free, &margs_write);
298
299         lockinit(&hammer2_mntlk, "mntlk", 0, 0);
300         TAILQ_INIT(&hammer2_mntlist);
301         TAILQ_INIT(&hammer2_pfslist);
302
303         hammer2_limit_dirty_chains = desiredvnodes / 10;
304
305         return (error);
306 }
307
308 static
309 int
310 hammer2_vfs_uninit(struct vfsconf *vfsp __unused)
311 {
312         objcache_destroy(cache_buffer_read);
313         objcache_destroy(cache_buffer_write);
314         return 0;
315 }
316
317 /*
318  * Core PFS allocator.  Used to allocate the pmp structure for PFS cluster
319  * mounts and the spmp structure for media (hmp) structures.
320  *
321  * pmp->modify_tid tracks new modify_tid transaction ids for front-end
322  * transactions.  Note that synchronization does not use this field.
323  * (typically frontend operations and synchronization cannot run on the
324  * same PFS node at the same time).
325  *
326  * XXX check locking
327  */
328 hammer2_pfs_t *
329 hammer2_pfsalloc(hammer2_cluster_t *cluster,
330                  const hammer2_inode_data_t *ripdata,
331                  hammer2_tid_t modify_tid)
332 {
333         hammer2_chain_t *rchain;
334         hammer2_inode_t *iroot;
335         hammer2_pfs_t *pmp;
336         int count;
337         int i;
338         int j;
339
340         /*
341          * Locate or create the PFS based on the cluster id.  If ripdata
342          * is NULL this is a spmp which is unique and is always allocated.
343          */
344         if (ripdata) {
345                 TAILQ_FOREACH(pmp, &hammer2_pfslist, mntentry) {
346                         if (bcmp(&pmp->pfs_clid, &ripdata->pfs_clid,
347                                  sizeof(pmp->pfs_clid)) == 0) {
348                                         break;
349                         }
350                 }
351         } else {
352                 pmp = NULL;
353         }
354
355         if (pmp == NULL) {
356                 pmp = kmalloc(sizeof(*pmp), M_HAMMER2, M_WAITOK | M_ZERO);
357                 hammer2_trans_manage_init(&pmp->tmanage);
358                 kmalloc_create(&pmp->minode, "HAMMER2-inodes");
359                 kmalloc_create(&pmp->mmsg, "HAMMER2-pfsmsg");
360                 lockinit(&pmp->lock, "pfslk", 0, 0);
361                 spin_init(&pmp->inum_spin, "hm2pfsalloc_inum");
362                 RB_INIT(&pmp->inum_tree);
363                 TAILQ_INIT(&pmp->unlinkq);
364                 spin_init(&pmp->list_spin, "hm2pfsalloc_list");
365
366                 /*
367                  * Save last media transaction id for flusher.
368                  */
369                 pmp->modify_tid = modify_tid;
370                 if (ripdata) {
371                         pmp->inode_tid = ripdata->pfs_inum + 1;
372                         pmp->pfs_clid = ripdata->pfs_clid;
373                 }
374                 hammer2_mtx_init(&pmp->wthread_mtx, "h2wthr");
375                 bioq_init(&pmp->wthread_bioq);
376                 TAILQ_INSERT_TAIL(&hammer2_pfslist, pmp, mntentry);
377
378                 /*
379                  * The synchronization thread may start too early, make
380                  * sure it stays frozen until we are ready to let it go.
381                  * XXX
382                  */
383                 /*
384                 pmp->primary_thr.flags = HAMMER2_SYNCTHR_FROZEN |
385                                          HAMMER2_SYNCTHR_REMASTER;
386                 */
387         }
388
389         /*
390          * Create the PFS's root inode.
391          */
392         if ((iroot = pmp->iroot) == NULL) {
393                 iroot = hammer2_inode_get(pmp, NULL, NULL);
394                 pmp->iroot = iroot;
395                 hammer2_inode_ref(iroot);
396                 hammer2_inode_unlock(iroot, NULL);
397         }
398
399         /*
400          * Stop here if no cluster is passed in.
401          */
402         if (cluster == NULL)
403                 goto done;
404
405         /*
406          * When a cluster is passed in we must add the cluster's chains
407          * to the PFS's root inode, update pmp->pfs_types[], and update
408          * the syncronization threads.
409          *
410          * At the moment empty spots can develop due to removals or failures.
411          * Ultimately we want to re-fill these spots but doing so might
412          * confused running code. XXX
413          */
414         hammer2_inode_ref(iroot);
415         hammer2_mtx_ex(&iroot->lock);
416         j = iroot->cluster.nchains;
417
418         kprintf("add PFS to pmp %p[%d]\n", pmp, j);
419
420         for (i = 0; i < cluster->nchains; ++i) {
421                 if (j == HAMMER2_MAXCLUSTER)
422                         break;
423                 rchain = cluster->array[i].chain;
424                 KKASSERT(rchain->pmp == NULL);
425                 rchain->pmp = pmp;
426                 hammer2_chain_ref(rchain);
427                 iroot->cluster.array[j].chain = rchain;
428                 pmp->pfs_types[j] = ripdata->pfs_type;
429                 pmp->pfs_names[j] = kstrdup(ripdata->filename, M_HAMMER2);
430
431                 /*
432                  * If the PFS is already mounted we must account
433                  * for the mount_count here.
434                  */
435                 if (pmp->mp)
436                         ++rchain->hmp->mount_count;
437
438                 /*
439                  * May have to fixup dirty chain tracking.  Previous
440                  * pmp was NULL so nothing to undo.
441                  */
442                 if (rchain->flags & HAMMER2_CHAIN_MODIFIED)
443                         hammer2_pfs_memory_inc(pmp);
444                 ++j;
445         }
446         iroot->cluster.nchains = j;
447
448         if (i != cluster->nchains) {
449                 kprintf("hammer2_mount: cluster full!\n");
450                 /* XXX fatal error? */
451         }
452
453         /*
454          * Update nmasters from any PFS inode which is part of the cluster.
455          * It is possible that this will result in a value which is too
456          * high.  MASTER PFSs are authoritative for pfs_nmasters and will
457          * override this value later on.
458          *
459          * (This informs us of masters that might not currently be
460          *  discoverable by this mount).
461          */
462         if (ripdata && pmp->pfs_nmasters < ripdata->pfs_nmasters) {
463                 pmp->pfs_nmasters = ripdata->pfs_nmasters;
464         }
465
466         /*
467          * Count visible masters.  Masters are usually added with
468          * ripdata->pfs_nmasters set to 1.  This detects when there
469          * are more (XXX and must update the master inodes).
470          */
471         count = 0;
472         for (i = 0; i < iroot->cluster.nchains; ++i) {
473                 if (pmp->pfs_types[i] == HAMMER2_PFSTYPE_MASTER)
474                         ++count;
475         }
476         if (pmp->pfs_nmasters < count)
477                 pmp->pfs_nmasters = count;
478
479         /*
480          * Create missing synchronization threads.
481          *
482          * Single-node masters (including snapshots) have nothing to
483          * synchronize and do not require this thread.
484          *
485          * Multi-node masters or any number of soft masters, slaves, copy,
486          * or other PFS types need the thread.
487          *
488          * Each thread is responsible for its particular cluster index.
489          * We use independent threads so stalls or mismatches related to
490          * any given target do not affect other targets.
491          */
492         for (i = 0; i < iroot->cluster.nchains; ++i) {
493                 if (pmp->sync_thrs[i].td)
494                         continue;
495                 if ((pmp->pfs_nmasters > 1 &&
496                      (pmp->pfs_types[i] == HAMMER2_PFSTYPE_MASTER)) ||
497                     pmp->pfs_types[i] != HAMMER2_PFSTYPE_MASTER) {
498                         hammer2_syncthr_create(&pmp->sync_thrs[i], pmp, i,
499                                                hammer2_syncthr_primary);
500                 }
501         }
502
503         hammer2_mtx_unlock(&iroot->lock);
504         hammer2_inode_drop(iroot);
505 done:
506         return pmp;
507 }
508
509 /*
510  * Destroy a PFS, typically only occurs after the last mount on a device
511  * has gone away.
512  */
513 static void
514 hammer2_pfsfree(hammer2_pfs_t *pmp)
515 {
516         hammer2_inode_t *iroot;
517         int i;
518
519         /*
520          * Cleanup our reference on iroot.  iroot is (should) not be needed
521          * by the flush code.
522          */
523         TAILQ_REMOVE(&hammer2_pfslist, pmp, mntentry);
524
525         iroot = pmp->iroot;
526         if (iroot) {
527                 for (i = 0; i < iroot->cluster.nchains; ++i)
528                         hammer2_syncthr_delete(&pmp->sync_thrs[i]);
529 #if REPORT_REFS_ERRORS
530                 if (pmp->iroot->refs != 1)
531                         kprintf("PMP->IROOT %p REFS WRONG %d\n",
532                                 pmp->iroot, pmp->iroot->refs);
533 #else
534                 KKASSERT(pmp->iroot->refs == 1);
535 #endif
536                 /* ref for pmp->iroot */
537                 hammer2_inode_drop(pmp->iroot);
538                 pmp->iroot = NULL;
539         }
540
541         kmalloc_destroy(&pmp->mmsg);
542         kmalloc_destroy(&pmp->minode);
543
544         kfree(pmp, M_HAMMER2);
545 }
546
547 /*
548  * Remove all references to hmp from the pfs list.  Any PFS which becomes
549  * empty is terminated and freed.
550  *
551  * XXX inefficient.
552  */
553 static void
554 hammer2_pfsfree_scan(hammer2_dev_t *hmp)
555 {
556         hammer2_pfs_t *pmp;
557         hammer2_inode_t *iroot;
558         hammer2_cluster_t *cluster;
559         hammer2_chain_t *rchain;
560         int didfreeze;
561         int i;
562
563 again:
564         TAILQ_FOREACH(pmp, &hammer2_pfslist, mntentry) {
565                 if ((iroot = pmp->iroot) == NULL)
566                         continue;
567                 if (hmp->spmp == pmp) {
568                         kprintf("unmount hmp %p remove spmp %p\n",
569                                 hmp, pmp);
570                         hmp->spmp = NULL;
571                 }
572
573                 /*
574                  * Determine if this PFS is affected.  If it is we must
575                  * freeze all management threads and lock its iroot.
576                  *
577                  * Freezing a management thread forces it idle, operations
578                  * in-progress will be aborted and it will have to start
579                  * over again when unfrozen, or exit if told to exit.
580                  */
581                 cluster = &iroot->cluster;
582                 for (i = 0; i < cluster->nchains; ++i) {
583                         rchain = cluster->array[i].chain;
584                         if (rchain == NULL || rchain->hmp != hmp)
585                                 continue;
586                         break;
587                 }
588                 if (i != cluster->nchains) {
589                         /*
590                          * Make sure all synchronization threads are locked
591                          * down.
592                          */
593                         for (i = 0; i < iroot->cluster.nchains; ++i)
594                                 hammer2_syncthr_freeze(&pmp->sync_thrs[i]);
595
596                         /*
597                          * Lock the inode and clean out matching chains.
598                          * Note that we cannot use hammer2_inode_lock_*()
599                          * here because that would attempt to validate the
600                          * cluster that we are in the middle of ripping
601                          * apart.
602                          *
603                          * WARNING! We are working directly on the inodes
604                          *          embedded cluster.
605                          */
606                         hammer2_mtx_ex(&iroot->lock);
607
608                         /*
609                          * Remove the chain from matching elements of the PFS.
610                          */
611                         for (i = 0; i < cluster->nchains; ++i) {
612                                 rchain = cluster->array[i].chain;
613                                 if (rchain == NULL || rchain->hmp != hmp)
614                                         continue;
615                                 hammer2_syncthr_delete(&pmp->sync_thrs[i]);
616                                 rchain = cluster->array[i].chain;
617                                 cluster->array[i].chain = NULL;
618                                 pmp->pfs_types[i] = 0;
619                                 if (pmp->pfs_names[i]) {
620                                         kfree(pmp->pfs_names[i], M_HAMMER2);
621                                         pmp->pfs_names[i] = NULL;
622                                 }
623                                 hammer2_chain_drop(rchain);
624
625                                 /* focus hint */
626                                 if (cluster->focus == rchain)
627                                         cluster->focus = NULL;
628                         }
629                         hammer2_mtx_unlock(&iroot->lock);
630                         didfreeze = 1;  /* remaster, unfreeze down below */
631                 } else {
632                         didfreeze = 0;
633                 }
634
635                 /*
636                  * Cleanup trailing chains.  Do not reorder chains (for now).
637                  * XXX might remove more than we intended.
638                  */
639                 while (i > 0) {
640                         if (cluster->array[i - 1].chain)
641                                 break;
642                         --i;
643                 }
644                 cluster->nchains = i;
645
646                 /*
647                  * If the PMP has no elements remaining we can destroy it.
648                  * (this will transition management threads from frozen->exit).
649                  */
650                 if (cluster->nchains == 0) {
651                         kprintf("unmount hmp %p last ref to PMP=%p\n",
652                                 hmp, pmp);
653                         hammer2_pfsfree(pmp);
654                         goto again;
655                 }
656
657                 /*
658                  * If elements still remain we need to set the REMASTER
659                  * flag and unfreeze it.
660                  */
661                 if (didfreeze) {
662                         for (i = 0; i < iroot->cluster.nchains; ++i) {
663                                 hammer2_syncthr_remaster(&pmp->sync_thrs[i]);
664                                 hammer2_syncthr_unfreeze(&pmp->sync_thrs[i]);
665                         }
666                 }
667         }
668 }
669
670 /*
671  * Mount or remount HAMMER2 fileystem from physical media
672  *
673  *      mountroot
674  *              mp              mount point structure
675  *              path            NULL
676  *              data            <unused>
677  *              cred            <unused>
678  *
679  *      mount
680  *              mp              mount point structure
681  *              path            path to mount point
682  *              data            pointer to argument structure in user space
683  *                      volume  volume path (device@LABEL form)
684  *                      hflags  user mount flags
685  *              cred            user credentials
686  *
687  * RETURNS:     0       Success
688  *              !0      error number
689  */
690 static
691 int
692 hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
693                   struct ucred *cred)
694 {
695         struct hammer2_mount_info info;
696         hammer2_pfs_t *pmp;
697         hammer2_pfs_t *spmp;
698         hammer2_dev_t *hmp;
699         hammer2_key_t key_next;
700         hammer2_key_t key_dummy;
701         hammer2_key_t lhc;
702         struct vnode *devvp;
703         struct nlookupdata nd;
704         hammer2_chain_t *parent;
705         hammer2_cluster_t *cluster;
706         hammer2_cluster_t *cparent;
707         const hammer2_inode_data_t *ripdata;
708         hammer2_blockref_t bref;
709         struct file *fp;
710         char devstr[MNAMELEN];
711         size_t size;
712         size_t done;
713         char *dev;
714         char *label;
715         int ronly = 1;
716         int error;
717         int cache_index;
718         int i;
719
720         hmp = NULL;
721         pmp = NULL;
722         dev = NULL;
723         label = NULL;
724         devvp = NULL;
725         cache_index = -1;
726
727         kprintf("hammer2_mount\n");
728
729         if (path == NULL) {
730                 /*
731                  * Root mount
732                  */
733                 bzero(&info, sizeof(info));
734                 info.cluster_fd = -1;
735                 return (EOPNOTSUPP);
736         } else {
737                 /*
738                  * Non-root mount or updating a mount
739                  */
740                 error = copyin(data, &info, sizeof(info));
741                 if (error)
742                         return (error);
743
744                 error = copyinstr(info.volume, devstr, MNAMELEN - 1, &done);
745                 if (error)
746                         return (error);
747
748                 /* Extract device and label */
749                 dev = devstr;
750                 label = strchr(devstr, '@');
751                 if (label == NULL ||
752                     ((label + 1) - dev) > done) {
753                         return (EINVAL);
754                 }
755                 *label = '\0';
756                 label++;
757                 if (*label == '\0')
758                         return (EINVAL);
759
760                 if (mp->mnt_flag & MNT_UPDATE) {
761                         /*
762                          * Update mount.  Note that pmp->iroot->cluster is
763                          * an inode-embedded cluster and thus cannot be
764                          * directly locked.
765                          *
766                          * XXX HAMMER2 needs to implement NFS export via
767                          *     mountctl.
768                          */
769                         pmp = MPTOPMP(mp);
770                         cluster = &pmp->iroot->cluster;
771                         for (i = 0; i < cluster->nchains; ++i) {
772                                 if (cluster->array[i].chain == NULL)
773                                         continue;
774                                 hmp = cluster->array[i].chain->hmp;
775                                 devvp = hmp->devvp;
776                                 error = hammer2_remount(hmp, mp, path,
777                                                         devvp, cred);
778                                 if (error)
779                                         break;
780                         }
781                         /*hammer2_inode_install_hidden(pmp);*/
782
783                         return error;
784                 }
785         }
786
787         /*
788          * HMP device mount
789          *
790          * Lookup name and verify it refers to a block device.
791          */
792         error = nlookup_init(&nd, dev, UIO_SYSSPACE, NLC_FOLLOW);
793         if (error == 0)
794                 error = nlookup(&nd);
795         if (error == 0)
796                 error = cache_vref(&nd.nl_nch, nd.nl_cred, &devvp);
797         nlookup_done(&nd);
798
799         if (error == 0) {
800                 if (vn_isdisk(devvp, &error))
801                         error = vfs_mountedon(devvp);
802         }
803
804         /*
805          * Determine if the device has already been mounted.  After this
806          * check hmp will be non-NULL if we are doing the second or more
807          * hammer2 mounts from the same device.
808          */
809         lockmgr(&hammer2_mntlk, LK_EXCLUSIVE);
810         TAILQ_FOREACH(hmp, &hammer2_mntlist, mntentry) {
811                 if (hmp->devvp == devvp)
812                         break;
813         }
814
815         /*
816          * Open the device if this isn't a secondary mount and construct
817          * the H2 device mount (hmp).
818          */
819         if (hmp == NULL) {
820                 hammer2_chain_t *schain;
821                 hammer2_xid_t xid;
822
823                 if (error == 0 && vcount(devvp) > 0)
824                         error = EBUSY;
825
826                 /*
827                  * Now open the device
828                  */
829                 if (error == 0) {
830                         ronly = ((mp->mnt_flag & MNT_RDONLY) != 0);
831                         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
832                         error = vinvalbuf(devvp, V_SAVE, 0, 0);
833                         if (error == 0) {
834                                 error = VOP_OPEN(devvp,
835                                                  ronly ? FREAD : FREAD | FWRITE,
836                                                  FSCRED, NULL);
837                         }
838                         vn_unlock(devvp);
839                 }
840                 if (error && devvp) {
841                         vrele(devvp);
842                         devvp = NULL;
843                 }
844                 if (error) {
845                         lockmgr(&hammer2_mntlk, LK_RELEASE);
846                         return error;
847                 }
848                 hmp = kmalloc(sizeof(*hmp), M_HAMMER2, M_WAITOK | M_ZERO);
849                 ksnprintf(hmp->devrepname, sizeof(hmp->devrepname), "%s", dev);
850                 hmp->ronly = ronly;
851                 hmp->devvp = devvp;
852                 kmalloc_create(&hmp->mchain, "HAMMER2-chains");
853                 TAILQ_INSERT_TAIL(&hammer2_mntlist, hmp, mntentry);
854                 RB_INIT(&hmp->iotree);
855                 spin_init(&hmp->io_spin, "hm2mount_io");
856                 spin_init(&hmp->list_spin, "hm2mount_list");
857                 TAILQ_INIT(&hmp->flushq);
858
859                 lockinit(&hmp->vollk, "h2vol", 0, 0);
860
861                 /*
862                  * vchain setup. vchain.data is embedded.
863                  * vchain.refs is initialized and will never drop to 0.
864                  *
865                  * NOTE! voldata is not yet loaded.
866                  */
867                 hmp->vchain.hmp = hmp;
868                 hmp->vchain.refs = 1;
869                 hmp->vchain.data = (void *)&hmp->voldata;
870                 hmp->vchain.bref.type = HAMMER2_BREF_TYPE_VOLUME;
871                 hmp->vchain.bref.data_off = 0 | HAMMER2_PBUFRADIX;
872                 hmp->vchain.bref.mirror_tid = hmp->voldata.mirror_tid;
873
874                 hammer2_chain_core_init(&hmp->vchain);
875                 /* hmp->vchain.u.xxx is left NULL */
876
877                 /*
878                  * fchain setup.  fchain.data is embedded.
879                  * fchain.refs is initialized and will never drop to 0.
880                  *
881                  * The data is not used but needs to be initialized to
882                  * pass assertion muster.  We use this chain primarily
883                  * as a placeholder for the freemap's top-level RBTREE
884                  * so it does not interfere with the volume's topology
885                  * RBTREE.
886                  */
887                 hmp->fchain.hmp = hmp;
888                 hmp->fchain.refs = 1;
889                 hmp->fchain.data = (void *)&hmp->voldata.freemap_blockset;
890                 hmp->fchain.bref.type = HAMMER2_BREF_TYPE_FREEMAP;
891                 hmp->fchain.bref.data_off = 0 | HAMMER2_PBUFRADIX;
892                 hmp->fchain.bref.mirror_tid = hmp->voldata.freemap_tid;
893                 hmp->fchain.bref.methods =
894                         HAMMER2_ENC_CHECK(HAMMER2_CHECK_FREEMAP) |
895                         HAMMER2_ENC_COMP(HAMMER2_COMP_NONE);
896
897                 hammer2_chain_core_init(&hmp->fchain);
898                 /* hmp->fchain.u.xxx is left NULL */
899
900                 /*
901                  * Install the volume header and initialize fields from
902                  * voldata.
903                  */
904                 error = hammer2_install_volume_header(hmp);
905                 if (error) {
906                         hammer2_unmount_helper(mp, NULL, hmp);
907                         lockmgr(&hammer2_mntlk, LK_RELEASE);
908                         hammer2_vfs_unmount(mp, MNT_FORCE);
909                         return error;
910                 }
911
912                 /*
913                  * Really important to get these right or flush will get
914                  * confused.
915                  */
916                 hmp->spmp = hammer2_pfsalloc(NULL, NULL, 0);
917                 kprintf("alloc spmp %p tid %016jx\n",
918                         hmp->spmp, hmp->voldata.mirror_tid);
919                 spmp = hmp->spmp;
920                 spmp->inode_tid = 1;
921
922                 /*
923                  * Dummy-up vchain and fchain's modify_tid.  mirror_tid
924                  * is inherited from the volume header.
925                  */
926                 xid = 0;
927                 hmp->vchain.bref.mirror_tid = hmp->voldata.mirror_tid;
928                 hmp->vchain.bref.modify_tid = hmp->vchain.bref.mirror_tid;
929                 hmp->vchain.pmp = spmp;
930                 hmp->fchain.bref.mirror_tid = hmp->voldata.freemap_tid;
931                 hmp->fchain.bref.modify_tid = hmp->fchain.bref.mirror_tid;
932                 hmp->fchain.pmp = spmp;
933
934                 /*
935                  * First locate the super-root inode, which is key 0
936                  * relative to the volume header's blockset.
937                  *
938                  * Then locate the root inode by scanning the directory keyspace
939                  * represented by the label.
940                  */
941                 parent = hammer2_chain_lookup_init(&hmp->vchain, 0);
942                 schain = hammer2_chain_lookup(&parent, &key_dummy,
943                                       HAMMER2_SROOT_KEY, HAMMER2_SROOT_KEY,
944                                       &cache_index, 0);
945                 hammer2_chain_lookup_done(parent);
946                 if (schain == NULL) {
947                         kprintf("hammer2_mount: invalid super-root\n");
948                         hammer2_unmount_helper(mp, NULL, hmp);
949                         lockmgr(&hammer2_mntlk, LK_RELEASE);
950                         hammer2_vfs_unmount(mp, MNT_FORCE);
951                         return EINVAL;
952                 }
953                 if (schain->error) {
954                         kprintf("hammer2_mount: error %s reading super-root\n",
955                                 hammer2_error_str(schain->error));
956                         hammer2_chain_unlock(schain);
957                         hammer2_chain_drop(schain);
958                         schain = NULL;
959                         hammer2_unmount_helper(mp, NULL, hmp);
960                         lockmgr(&hammer2_mntlk, LK_RELEASE);
961                         hammer2_vfs_unmount(mp, MNT_FORCE);
962                         return EINVAL;
963                 }
964                 spmp->modify_tid = schain->bref.modify_tid;
965
966                 /*
967                  * Sanity-check schain's pmp and finish initialization.
968                  * Any chain belonging to the super-root topology should
969                  * have a NULL pmp (not even set to spmp).
970                  */
971                 ripdata = &hammer2_chain_rdata(schain)->ipdata;
972                 KKASSERT(schain->pmp == NULL);
973                 spmp->pfs_clid = ripdata->pfs_clid;
974
975                 /*
976                  * Replace the dummy spmp->iroot with a real one.  It's
977                  * easier to just do a wholesale replacement than to try
978                  * to update the chain and fixup the iroot fields.
979                  *
980                  * The returned inode is locked with the supplied cluster.
981                  */
982                 cluster = hammer2_cluster_from_chain(schain);
983                 hammer2_inode_drop(spmp->iroot);
984                 spmp->iroot = NULL;
985                 spmp->iroot = hammer2_inode_get(spmp, NULL, cluster);
986                 spmp->spmp_hmp = hmp;
987                 spmp->pfs_types[0] = ripdata->pfs_type;
988                 hammer2_inode_ref(spmp->iroot);
989                 hammer2_inode_unlock(spmp->iroot, cluster);
990                 schain = NULL;
991                 /* leave spmp->iroot with one ref */
992
993                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
994                         error = hammer2_recovery(hmp);
995                         /* XXX do something with error */
996                 }
997                 hammer2_update_pmps(hmp);
998                 hammer2_iocom_init(hmp);
999
1000                 /*
1001                  * Ref the cluster management messaging descriptor.  The mount
1002                  * program deals with the other end of the communications pipe.
1003                  */
1004                 fp = holdfp(curproc->p_fd, info.cluster_fd, -1);
1005                 if (fp) {
1006                         hammer2_cluster_reconnect(hmp, fp);
1007                 } else {
1008                         kprintf("hammer2_mount: bad cluster_fd!\n");
1009                 }
1010         } else {
1011                 spmp = hmp->spmp;
1012         }
1013
1014         /*
1015          * Lookup the mount point under the media-localized super-root.
1016          * Scanning hammer2_pfslist doesn't help us because it represents
1017          * PFS cluster ids which can aggregate several named PFSs together.
1018          *
1019          * cluster->pmp will incorrectly point to spmp and must be fixed
1020          * up later on.
1021          */
1022         cparent = hammer2_inode_lock(spmp->iroot, HAMMER2_RESOLVE_ALWAYS);
1023         lhc = hammer2_dirhash(label, strlen(label));
1024         cluster = hammer2_cluster_lookup(cparent, &key_next,
1025                                       lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1026                                       0);
1027         while (cluster) {
1028                 if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE &&
1029                     strcmp(label,
1030                        hammer2_cluster_rdata(cluster)->ipdata.filename) == 0) {
1031                         break;
1032                 }
1033                 cluster = hammer2_cluster_next(cparent, cluster, &key_next,
1034                                             key_next,
1035                                             lhc + HAMMER2_DIRHASH_LOMASK, 0);
1036         }
1037         hammer2_inode_unlock(spmp->iroot, cparent);
1038
1039         /*
1040          * PFS could not be found?
1041          */
1042         if (cluster == NULL) {
1043                 kprintf("hammer2_mount: PFS label not found\n");
1044                 hammer2_unmount_helper(mp, NULL, hmp);
1045                 lockmgr(&hammer2_mntlk, LK_RELEASE);
1046                 hammer2_vfs_unmount(mp, MNT_FORCE);
1047
1048                 return EINVAL;
1049         }
1050
1051         /*
1052          * Acquire the pmp structure (it should have already been allocated
1053          * via hammer2_update_pmps() so do not pass cluster in to add to
1054          * available chains).
1055          *
1056          * Check if the cluster has already been mounted.  A cluster can
1057          * only be mounted once, use null mounts to mount additional copies.
1058          */
1059         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1060         hammer2_cluster_bref(cluster, &bref);
1061         pmp = hammer2_pfsalloc(NULL, ripdata, bref.modify_tid);
1062         hammer2_cluster_unlock(cluster);
1063         hammer2_cluster_drop(cluster);
1064
1065         if (pmp->mp) {
1066                 kprintf("hammer2_mount: PFS already mounted!\n");
1067                 hammer2_unmount_helper(mp, NULL, hmp);
1068                 lockmgr(&hammer2_mntlk, LK_RELEASE);
1069                 hammer2_vfs_unmount(mp, MNT_FORCE);
1070
1071                 return EBUSY;
1072         }
1073
1074         /*
1075          * Finish the mount
1076          */
1077         kprintf("hammer2_mount hmp=%p pmp=%p\n", hmp, pmp);
1078
1079         mp->mnt_flag = MNT_LOCAL;
1080         mp->mnt_kern_flag |= MNTK_ALL_MPSAFE;   /* all entry pts are SMP */
1081         mp->mnt_kern_flag |= MNTK_THR_SYNC;     /* new vsyncscan semantics */
1082  
1083         /*
1084          * required mount structure initializations
1085          */
1086         mp->mnt_stat.f_iosize = HAMMER2_PBUFSIZE;
1087         mp->mnt_stat.f_bsize = HAMMER2_PBUFSIZE;
1088  
1089         mp->mnt_vstat.f_frsize = HAMMER2_PBUFSIZE;
1090         mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE;
1091  
1092         /*
1093          * Optional fields
1094          */
1095         mp->mnt_iosize_max = MAXPHYS;
1096
1097         /*
1098          * Connect up mount pointers.
1099          */
1100         hammer2_mount_helper(mp, pmp);
1101
1102         lockmgr(&hammer2_mntlk, LK_RELEASE);
1103
1104         /*
1105          * A mounted PFS needs a write thread for logical buffers and
1106          * a hidden directory for deletions of open files.  These features
1107          * are not used by unmounted PFSs.
1108          *
1109          * The logical file buffer bio write thread handles things like
1110          * physical block assignment and compression.
1111          */
1112         pmp->wthread_destroy = 0;
1113         lwkt_create(hammer2_write_thread, pmp,
1114                     &pmp->wthread_td, NULL, 0, -1, "h2pfs-%s", label);
1115
1116         /*
1117          * With the cluster operational install ihidden.
1118          * (only applicable to pfs mounts, not applicable to spmp)
1119          */
1120         hammer2_inode_install_hidden(pmp);
1121
1122         /*
1123          * Finish setup
1124          */
1125         vfs_getnewfsid(mp);
1126         vfs_add_vnodeops(mp, &hammer2_vnode_vops, &mp->mnt_vn_norm_ops);
1127         vfs_add_vnodeops(mp, &hammer2_spec_vops, &mp->mnt_vn_spec_ops);
1128         vfs_add_vnodeops(mp, &hammer2_fifo_vops, &mp->mnt_vn_fifo_ops);
1129
1130         copyinstr(info.volume, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
1131         bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
1132         bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname));
1133         copyinstr(path, mp->mnt_stat.f_mntonname,
1134                   sizeof(mp->mnt_stat.f_mntonname) - 1,
1135                   &size);
1136
1137         /*
1138          * Initial statfs to prime mnt_stat.
1139          */
1140         hammer2_vfs_statfs(mp, &mp->mnt_stat, cred);
1141         
1142         return 0;
1143 }
1144
1145 /*
1146  * Scan PFSs under the super-root and create hammer2_pfs structures.
1147  */
1148 static
1149 void
1150 hammer2_update_pmps(hammer2_dev_t *hmp)
1151 {
1152         const hammer2_inode_data_t *ripdata;
1153         hammer2_cluster_t *cparent;
1154         hammer2_cluster_t *cluster;
1155         hammer2_blockref_t bref;
1156         hammer2_pfs_t *spmp;
1157         hammer2_pfs_t *pmp;
1158         hammer2_key_t key_next;
1159
1160         /*
1161          * Lookup mount point under the media-localized super-root.
1162          *
1163          * cluster->pmp will incorrectly point to spmp and must be fixed
1164          * up later on.
1165          */
1166         spmp = hmp->spmp;
1167         cparent = hammer2_inode_lock(spmp->iroot, HAMMER2_RESOLVE_ALWAYS);
1168         cluster = hammer2_cluster_lookup(cparent, &key_next,
1169                                          HAMMER2_KEY_MIN,
1170                                          HAMMER2_KEY_MAX,
1171                                          0);
1172         while (cluster) {
1173                 if (hammer2_cluster_type(cluster) != HAMMER2_BREF_TYPE_INODE)
1174                         continue;
1175                 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1176                 hammer2_cluster_bref(cluster, &bref);
1177                 kprintf("ADD LOCAL PFS: %s\n", ripdata->filename);
1178
1179                 pmp = hammer2_pfsalloc(cluster, ripdata, bref.modify_tid);
1180                 cluster = hammer2_cluster_next(cparent, cluster,
1181                                                &key_next,
1182                                                key_next,
1183                                                HAMMER2_KEY_MAX,
1184                                                0);
1185         }
1186         hammer2_inode_unlock(spmp->iroot, cparent);
1187 }
1188
1189 /*
1190  * Handle bioq for strategy write
1191  */
1192 static
1193 void
1194 hammer2_write_thread(void *arg)
1195 {
1196         hammer2_pfs_t *pmp;
1197         struct bio *bio;
1198         struct buf *bp;
1199         hammer2_trans_t trans;
1200         struct vnode *vp;
1201         hammer2_inode_t *ip;
1202         hammer2_cluster_t *cparent;
1203         const hammer2_inode_data_t *ripdata;
1204         hammer2_key_t lbase;
1205         int lblksize;
1206         int pblksize;
1207         int error;
1208         
1209         pmp = arg;
1210         
1211         hammer2_mtx_ex(&pmp->wthread_mtx);
1212         for (;;) {
1213                 /*
1214                  * Wait for work.  Break out and destroy the thread only if
1215                  * requested and no work remains.
1216                  */
1217                 if (bioq_first(&pmp->wthread_bioq) == NULL) {
1218                         if (pmp->wthread_destroy)
1219                                 break;
1220                         mtxsleep(&pmp->wthread_bioq, &pmp->wthread_mtx,
1221                                  0, "h2bioqw", 0);
1222                         continue;
1223                 }
1224
1225                 /*
1226                  * Special transaction for logical buffer cache writes.
1227                  */
1228                 hammer2_trans_init(&trans, pmp, HAMMER2_TRANS_BUFCACHE);
1229
1230                 while ((bio = bioq_takefirst(&pmp->wthread_bioq)) != NULL) {
1231                         /*
1232                          * dummy bio for synchronization.  The transaction
1233                          * must be terminated.
1234                          */
1235                         if (bio->bio_buf == NULL) {
1236                                 bio->bio_flags |= BIO_DONE;
1237                                 /* bio will become invalid after DONE set */
1238                                 wakeup(bio);
1239                                 break;
1240                         }
1241
1242                         /*
1243                          * else normal bio processing
1244                          */
1245                         hammer2_mtx_unlock(&pmp->wthread_mtx);
1246
1247                         hammer2_lwinprog_drop(pmp);
1248                         
1249                         error = 0;
1250                         bp = bio->bio_buf;
1251                         vp = bp->b_vp;
1252                         ip = VTOI(vp);
1253
1254                         /*
1255                          * Inode is modified, flush size and mtime changes
1256                          * to ensure that the file size remains consistent
1257                          * with the buffers being flushed.
1258                          *
1259                          * NOTE: The inode_fsync() call only flushes the
1260                          *       inode's meta-data state, it doesn't try
1261                          *       to flush underlying buffers or chains.
1262                          *
1263                          * NOTE: hammer2_write_file_core() may indirectly
1264                          *       modify and modsync the inode.
1265                          */
1266                         cparent = hammer2_inode_lock(ip,
1267                                                      HAMMER2_RESOLVE_ALWAYS);
1268                         if (ip->flags & (HAMMER2_INODE_RESIZED |
1269                                          HAMMER2_INODE_MTIME)) {
1270                                 hammer2_inode_fsync(&trans, ip, cparent);
1271                         }
1272                         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
1273                         lblksize = hammer2_calc_logical(ip, bio->bio_offset,
1274                                                         &lbase, NULL);
1275                         pblksize = hammer2_calc_physical(ip, ripdata, lbase);
1276                         hammer2_write_file_core(bp, &trans, ip, ripdata,
1277                                                 cparent,
1278                                                 lbase, IO_ASYNC,
1279                                                 pblksize, &error);
1280                         /* ripdata can be invalid after call */
1281                         hammer2_inode_unlock(ip, cparent);
1282                         if (error) {
1283                                 kprintf("hammer2: error in buffer write\n");
1284                                 bp->b_flags |= B_ERROR;
1285                                 bp->b_error = EIO;
1286                         }
1287                         biodone(bio);
1288                         hammer2_mtx_ex(&pmp->wthread_mtx);
1289                 }
1290                 hammer2_trans_done(&trans);
1291         }
1292         pmp->wthread_destroy = -1;
1293         wakeup(&pmp->wthread_destroy);
1294         
1295         hammer2_mtx_unlock(&pmp->wthread_mtx);
1296 }
1297
1298 void
1299 hammer2_bioq_sync(hammer2_pfs_t *pmp)
1300 {
1301         struct bio sync_bio;
1302
1303         bzero(&sync_bio, sizeof(sync_bio));     /* dummy with no bio_buf */
1304         hammer2_mtx_ex(&pmp->wthread_mtx);
1305         if (pmp->wthread_destroy == 0 &&
1306             TAILQ_FIRST(&pmp->wthread_bioq.queue)) {
1307                 bioq_insert_tail(&pmp->wthread_bioq, &sync_bio);
1308                 while ((sync_bio.bio_flags & BIO_DONE) == 0)
1309                         mtxsleep(&sync_bio, &pmp->wthread_mtx, 0, "h2bioq", 0);
1310         }
1311         hammer2_mtx_unlock(&pmp->wthread_mtx);
1312 }
1313
1314 /* 
1315  * Return a chain suitable for I/O, creating the chain if necessary
1316  * and assigning its physical block.  The cluster will be in a modified
1317  * state.
1318  *
1319  * cparent can wind up being anything.
1320  *
1321  * NOTE: Special case for data embedded in inode.
1322  */
1323 static
1324 hammer2_cluster_t *
1325 hammer2_assign_physical(hammer2_trans_t *trans,
1326                         hammer2_inode_t *ip, hammer2_cluster_t *cparent,
1327                         hammer2_key_t lbase, int pblksize, int *errorp)
1328 {
1329         hammer2_cluster_t *cluster;
1330         hammer2_cluster_t *dparent;
1331         hammer2_key_t key_dummy;
1332         int pradix = hammer2_getradix(pblksize);
1333
1334         /*
1335          * Locate the chain associated with lbase, return a locked chain.
1336          * However, do not instantiate any data reference (which utilizes a
1337          * device buffer) because we will be using direct IO via the
1338          * logical buffer cache buffer.
1339          */
1340         *errorp = 0;
1341         KKASSERT(pblksize >= HAMMER2_ALLOC_MIN);
1342 retry:
1343         dparent = hammer2_cluster_lookup_init(cparent, 0);
1344         cluster = hammer2_cluster_lookup(dparent, &key_dummy,
1345                                      lbase, lbase,
1346                                      HAMMER2_LOOKUP_NODATA);
1347
1348         if (cluster == NULL) {
1349                 /*
1350                  * We found a hole, create a new chain entry.
1351                  *
1352                  * NOTE: DATA chains are created without device backing
1353                  *       store (nor do we want any).
1354                  */
1355                 *errorp = hammer2_cluster_create(trans, dparent, &cluster,
1356                                                lbase, HAMMER2_PBUFRADIX,
1357                                                HAMMER2_BREF_TYPE_DATA,
1358                                                pblksize, 0);
1359                 if (cluster == NULL) {
1360                         hammer2_cluster_lookup_done(dparent);
1361                         panic("hammer2_cluster_create: par=%p error=%d\n",
1362                                 dparent->focus, *errorp);
1363                         goto retry;
1364                 }
1365                 /*ip->delta_dcount += pblksize;*/
1366         } else {
1367                 switch (hammer2_cluster_type(cluster)) {
1368                 case HAMMER2_BREF_TYPE_INODE:
1369                         /*
1370                          * The data is embedded in the inode, which requires
1371                          * a bit more finess.
1372                          */
1373                         hammer2_cluster_modify_ip(trans, ip, cluster, 0);
1374                         break;
1375                 case HAMMER2_BREF_TYPE_DATA:
1376                         if (hammer2_cluster_need_resize(cluster, pblksize)) {
1377                                 hammer2_cluster_resize(trans, ip,
1378                                                      dparent, cluster,
1379                                                      pradix,
1380                                                      HAMMER2_MODIFY_OPTDATA);
1381                         }
1382
1383                         /*
1384                          * DATA buffers must be marked modified whether the
1385                          * data is in a logical buffer or not.  We also have
1386                          * to make this call to fixup the chain data pointers
1387                          * after resizing in case this is an encrypted or
1388                          * compressed buffer.
1389                          */
1390                         hammer2_cluster_modify(trans, cluster,
1391                                                HAMMER2_MODIFY_OPTDATA);
1392                         break;
1393                 default:
1394                         panic("hammer2_assign_physical: bad type");
1395                         /* NOT REACHED */
1396                         break;
1397                 }
1398         }
1399
1400         /*
1401          * Cleanup.  If cluster wound up being the inode itself, i.e.
1402          * the DIRECTDATA case for offset 0, then we need to update cparent.
1403          * The caller expects cparent to not become stale.
1404          */
1405         hammer2_cluster_lookup_done(dparent);
1406         /* dparent = NULL; safety */
1407         return (cluster);
1408 }
1409
1410 /* 
1411  * bio queued from hammer2_vnops.c.
1412  *
1413  * The core write function which determines which path to take
1414  * depending on compression settings.  We also have to locate the
1415  * related clusters so we can calculate and set the check data for
1416  * the blockref.
1417  */
1418 static
1419 void
1420 hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans,
1421                         hammer2_inode_t *ip,
1422                         const hammer2_inode_data_t *ripdata,
1423                         hammer2_cluster_t *cparent,
1424                         hammer2_key_t lbase, int ioflag, int pblksize,
1425                         int *errorp)
1426 {
1427         hammer2_cluster_t *cluster;
1428
1429         switch(HAMMER2_DEC_ALGO(ripdata->comp_algo)) {
1430         case HAMMER2_COMP_NONE:
1431                 /*
1432                  * We have to assign physical storage to the buffer
1433                  * we intend to dirty or write now to avoid deadlocks
1434                  * in the strategy code later.
1435                  *
1436                  * This can return NOOFFSET for inode-embedded data.
1437                  * The strategy code will take care of it in that case.
1438                  */
1439                 cluster = hammer2_assign_physical(trans, ip, cparent,
1440                                                 lbase, pblksize,
1441                                                 errorp);
1442                 if (cluster->ddflag) {
1443                         hammer2_inode_data_t *wipdata;
1444
1445                         wipdata = hammer2_cluster_modify_ip(trans, ip,
1446                                                             cluster, 0);
1447                         KKASSERT(wipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA);
1448                         KKASSERT(bp->b_loffset == 0);
1449                         bcopy(bp->b_data, wipdata->u.data,
1450                               HAMMER2_EMBEDDED_BYTES);
1451                         hammer2_cluster_modsync(cluster);
1452                 } else {
1453                         hammer2_write_bp(cluster, bp, ioflag, pblksize,
1454                                          errorp, ripdata->check_algo);
1455                 }
1456                 /* ripdata can become invalid */
1457                 if (cluster) {
1458                         hammer2_cluster_unlock(cluster);
1459                         hammer2_cluster_drop(cluster);
1460                 }
1461                 break;
1462         case HAMMER2_COMP_AUTOZERO:
1463                 /*
1464                  * Check for zero-fill only
1465                  */
1466                 hammer2_zero_check_and_write(bp, trans, ip,
1467                                     ripdata, cparent, lbase,
1468                                     ioflag, pblksize, errorp,
1469                                     ripdata->check_algo);
1470                 break;
1471         case HAMMER2_COMP_LZ4:
1472         case HAMMER2_COMP_ZLIB:
1473         default:
1474                 /*
1475                  * Check for zero-fill and attempt compression.
1476                  */
1477                 hammer2_compress_and_write(bp, trans, ip,
1478                                            ripdata, cparent,
1479                                            lbase, ioflag,
1480                                            pblksize, errorp,
1481                                            ripdata->comp_algo,
1482                                            ripdata->check_algo);
1483                 break;
1484         }
1485 }
1486
1487 /*
1488  * Generic function that will perform the compression in compression
1489  * write path. The compression algorithm is determined by the settings
1490  * obtained from inode.
1491  */
1492 static
1493 void
1494 hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans,
1495         hammer2_inode_t *ip, const hammer2_inode_data_t *ripdata,
1496         hammer2_cluster_t *cparent,
1497         hammer2_key_t lbase, int ioflag, int pblksize,
1498         int *errorp, int comp_algo, int check_algo)
1499 {
1500         hammer2_cluster_t *cluster;
1501         hammer2_chain_t *chain;
1502         int comp_size;
1503         int comp_block_size;
1504         int i;
1505         char *comp_buffer;
1506
1507         if (test_block_zeros(bp->b_data, pblksize)) {
1508                 zero_write(bp, trans, ip, ripdata, cparent, lbase, errorp);
1509                 return;
1510         }
1511
1512         comp_size = 0;
1513         comp_buffer = NULL;
1514
1515         KKASSERT(pblksize / 2 <= 32768);
1516                 
1517         if (ip->comp_heuristic < 8 || (ip->comp_heuristic & 7) == 0) {
1518                 z_stream strm_compress;
1519                 int comp_level;
1520                 int ret;
1521
1522                 switch(HAMMER2_DEC_ALGO(comp_algo)) {
1523                 case HAMMER2_COMP_LZ4:
1524                         comp_buffer = objcache_get(cache_buffer_write,
1525                                                    M_INTWAIT);
1526                         comp_size = LZ4_compress_limitedOutput(
1527                                         bp->b_data,
1528                                         &comp_buffer[sizeof(int)],
1529                                         pblksize,
1530                                         pblksize / 2 - sizeof(int));
1531                         /*
1532                          * We need to prefix with the size, LZ4
1533                          * doesn't do it for us.  Add the related
1534                          * overhead.
1535                          */
1536                         *(int *)comp_buffer = comp_size;
1537                         if (comp_size)
1538                                 comp_size += sizeof(int);
1539                         break;
1540                 case HAMMER2_COMP_ZLIB:
1541                         comp_level = HAMMER2_DEC_LEVEL(comp_algo);
1542                         if (comp_level == 0)
1543                                 comp_level = 6; /* default zlib compression */
1544                         else if (comp_level < 6)
1545                                 comp_level = 6;
1546                         else if (comp_level > 9)
1547                                 comp_level = 9;
1548                         ret = deflateInit(&strm_compress, comp_level);
1549                         if (ret != Z_OK) {
1550                                 kprintf("HAMMER2 ZLIB: fatal error "
1551                                         "on deflateInit.\n");
1552                         }
1553
1554                         comp_buffer = objcache_get(cache_buffer_write,
1555                                                    M_INTWAIT);
1556                         strm_compress.next_in = bp->b_data;
1557                         strm_compress.avail_in = pblksize;
1558                         strm_compress.next_out = comp_buffer;
1559                         strm_compress.avail_out = pblksize / 2;
1560                         ret = deflate(&strm_compress, Z_FINISH);
1561                         if (ret == Z_STREAM_END) {
1562                                 comp_size = pblksize / 2 -
1563                                             strm_compress.avail_out;
1564                         } else {
1565                                 comp_size = 0;
1566                         }
1567                         ret = deflateEnd(&strm_compress);
1568                         break;
1569                 default:
1570                         kprintf("Error: Unknown compression method.\n");
1571                         kprintf("Comp_method = %d.\n", comp_algo);
1572                         break;
1573                 }
1574         }
1575
1576         if (comp_size == 0) {
1577                 /*
1578                  * compression failed or turned off
1579                  */
1580                 comp_block_size = pblksize;     /* safety */
1581                 if (++ip->comp_heuristic > 128)
1582                         ip->comp_heuristic = 8;
1583         } else {
1584                 /*
1585                  * compression succeeded
1586                  */
1587                 ip->comp_heuristic = 0;
1588                 if (comp_size <= 1024) {
1589                         comp_block_size = 1024;
1590                 } else if (comp_size <= 2048) {
1591                         comp_block_size = 2048;
1592                 } else if (comp_size <= 4096) {
1593                         comp_block_size = 4096;
1594                 } else if (comp_size <= 8192) {
1595                         comp_block_size = 8192;
1596                 } else if (comp_size <= 16384) {
1597                         comp_block_size = 16384;
1598                 } else if (comp_size <= 32768) {
1599                         comp_block_size = 32768;
1600                 } else {
1601                         panic("hammer2: WRITE PATH: "
1602                               "Weird comp_size value.");
1603                         /* NOT REACHED */
1604                         comp_block_size = pblksize;
1605                 }
1606         }
1607
1608         cluster = hammer2_assign_physical(trans, ip, cparent,
1609                                           lbase, comp_block_size,
1610                                           errorp);
1611         ripdata = NULL;
1612
1613         if (*errorp) {
1614                 kprintf("WRITE PATH: An error occurred while "
1615                         "assigning physical space.\n");
1616                 KKASSERT(cluster == NULL);
1617                 goto done;
1618         }
1619
1620         if (cluster->ddflag) {
1621                 hammer2_inode_data_t *wipdata;
1622
1623                 wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1624                 KKASSERT(wipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA);
1625                 KKASSERT(bp->b_loffset == 0);
1626                 bcopy(bp->b_data, wipdata->u.data, HAMMER2_EMBEDDED_BYTES);
1627                 hammer2_cluster_modsync(cluster);
1628         } else
1629         for (i = 0; i < cluster->nchains; ++i) {
1630                 hammer2_io_t *dio;
1631                 char *bdata;
1632
1633                 /* XXX hackx */
1634
1635                 if ((cluster->array[i].flags & HAMMER2_CITEM_FEMOD) == 0)
1636                         continue;
1637                 chain = cluster->array[i].chain;        /* XXX */
1638                 if (chain == NULL)
1639                         continue;
1640                 KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED);
1641
1642                 switch(chain->bref.type) {
1643                 case HAMMER2_BREF_TYPE_INODE:
1644                         panic("hammer2_write_bp: unexpected inode\n");
1645                         break;
1646                 case HAMMER2_BREF_TYPE_DATA:
1647                         /*
1648                          * Optimize out the read-before-write
1649                          * if possible.
1650                          */
1651                         *errorp = hammer2_io_newnz(chain->hmp,
1652                                                    chain->bref.data_off,
1653                                                    chain->bytes,
1654                                                    &dio);
1655                         if (*errorp) {
1656                                 hammer2_io_brelse(&dio);
1657                                 kprintf("hammer2: WRITE PATH: "
1658                                         "dbp bread error\n");
1659                                 break;
1660                         }
1661                         bdata = hammer2_io_data(dio, chain->bref.data_off);
1662
1663                         /*
1664                          * When loading the block make sure we don't
1665                          * leave garbage after the compressed data.
1666                          */
1667                         if (comp_size) {
1668                                 chain->bref.methods =
1669                                         HAMMER2_ENC_COMP(comp_algo) +
1670                                         HAMMER2_ENC_CHECK(check_algo);
1671                                 bcopy(comp_buffer, bdata, comp_size);
1672                                 if (comp_size != comp_block_size) {
1673                                         bzero(bdata + comp_size,
1674                                               comp_block_size - comp_size);
1675                                 }
1676                         } else {
1677                                 chain->bref.methods =
1678                                         HAMMER2_ENC_COMP(
1679                                                 HAMMER2_COMP_NONE) +
1680                                         HAMMER2_ENC_CHECK(check_algo);
1681                                 bcopy(bp->b_data, bdata, pblksize);
1682                         }
1683
1684                         /*
1685                          * The flush code doesn't calculate check codes for
1686                          * file data (doing so can result in excessive I/O),
1687                          * so we do it here.
1688                          */
1689                         hammer2_chain_setcheck(chain, bdata);
1690
1691                         /*
1692                          * Device buffer is now valid, chain is no longer in
1693                          * the initial state.
1694                          *
1695                          * (No blockref table worries with file data)
1696                          */
1697                         atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL);
1698
1699                         /* Now write the related bdp. */
1700                         if (ioflag & IO_SYNC) {
1701                                 /*
1702                                  * Synchronous I/O requested.
1703                                  */
1704                                 hammer2_io_bwrite(&dio);
1705                         /*
1706                         } else if ((ioflag & IO_DIRECT) &&
1707                                    loff + n == pblksize) {
1708                                 hammer2_io_bdwrite(&dio);
1709                         */
1710                         } else if (ioflag & IO_ASYNC) {
1711                                 hammer2_io_bawrite(&dio);
1712                         } else {
1713                                 hammer2_io_bdwrite(&dio);
1714                         }
1715                         break;
1716                 default:
1717                         panic("hammer2_write_bp: bad chain type %d\n",
1718                                 chain->bref.type);
1719                         /* NOT REACHED */
1720                         break;
1721                 }
1722         }
1723 done:
1724         if (cluster) {
1725                 hammer2_cluster_unlock(cluster);
1726                 hammer2_cluster_drop(cluster);
1727         }
1728         if (comp_buffer)
1729                 objcache_put(cache_buffer_write, comp_buffer);
1730 }
1731
1732 /*
1733  * Function that performs zero-checking and writing without compression,
1734  * it corresponds to default zero-checking path.
1735  */
1736 static
1737 void
1738 hammer2_zero_check_and_write(struct buf *bp, hammer2_trans_t *trans,
1739         hammer2_inode_t *ip, const hammer2_inode_data_t *ripdata,
1740         hammer2_cluster_t *cparent,
1741         hammer2_key_t lbase, int ioflag, int pblksize, int *errorp,
1742         int check_algo)
1743 {
1744         hammer2_cluster_t *cluster;
1745
1746         if (test_block_zeros(bp->b_data, pblksize)) {
1747                 zero_write(bp, trans, ip, ripdata, cparent, lbase, errorp);
1748                 /* ripdata can become invalid */
1749         } else {
1750                 cluster = hammer2_assign_physical(trans, ip, cparent,
1751                                                   lbase, pblksize, errorp);
1752                 hammer2_write_bp(cluster, bp, ioflag, pblksize, errorp,
1753                                  check_algo);
1754                 /* ripdata can become invalid */
1755                 if (cluster) {
1756                         hammer2_cluster_unlock(cluster);
1757                         hammer2_cluster_drop(cluster);
1758                 }
1759         }
1760 }
1761
1762 /*
1763  * A function to test whether a block of data contains only zeros,
1764  * returns TRUE (non-zero) if the block is all zeros.
1765  */
1766 static
1767 int
1768 test_block_zeros(const char *buf, size_t bytes)
1769 {
1770         size_t i;
1771
1772         for (i = 0; i < bytes; i += sizeof(long)) {
1773                 if (*(const long *)(buf + i) != 0)
1774                         return (0);
1775         }
1776         return (1);
1777 }
1778
1779 /*
1780  * Function to "write" a block that contains only zeros.
1781  */
1782 static
1783 void
1784 zero_write(struct buf *bp, hammer2_trans_t *trans,
1785            hammer2_inode_t *ip, const hammer2_inode_data_t *ripdata,
1786            hammer2_cluster_t *cparent,
1787            hammer2_key_t lbase, int *errorp __unused)
1788 {
1789         hammer2_cluster_t *cluster;
1790         hammer2_key_t key_dummy;
1791
1792         cparent = hammer2_cluster_lookup_init(cparent, 0);
1793         cluster = hammer2_cluster_lookup(cparent, &key_dummy, lbase, lbase,
1794                                      HAMMER2_LOOKUP_NODATA);
1795         if (cluster) {
1796                 if (cluster->ddflag) {
1797                         hammer2_inode_data_t *wipdata;
1798
1799                         wipdata = hammer2_cluster_modify_ip(trans, ip,
1800                                                             cluster, 0);
1801                         KKASSERT(wipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA);
1802                         KKASSERT(bp->b_loffset == 0);
1803                         bzero(wipdata->u.data, HAMMER2_EMBEDDED_BYTES);
1804                         hammer2_cluster_modsync(cluster);
1805                 } else {
1806                         hammer2_cluster_delete(trans, cparent, cluster,
1807                                                HAMMER2_DELETE_PERMANENT);
1808                 }
1809                 hammer2_cluster_unlock(cluster);
1810                 hammer2_cluster_drop(cluster);
1811         }
1812         hammer2_cluster_lookup_done(cparent);
1813 }
1814
1815 /*
1816  * Function to write the data as it is, without performing any sort of
1817  * compression. This function is used in path without compression and
1818  * default zero-checking path.
1819  */
1820 static
1821 void
1822 hammer2_write_bp(hammer2_cluster_t *cluster, struct buf *bp, int ioflag,
1823                                 int pblksize, int *errorp, int check_algo)
1824 {
1825         hammer2_chain_t *chain;
1826         hammer2_inode_data_t *wipdata;
1827         hammer2_io_t *dio;
1828         char *bdata;
1829         int error;
1830         int i;
1831
1832         error = 0;      /* XXX TODO below */
1833
1834         for (i = 0; i < cluster->nchains; ++i) {
1835                 if ((cluster->array[i].flags & HAMMER2_CITEM_FEMOD) == 0)
1836                         continue;
1837                 chain = cluster->array[i].chain;        /* XXX */
1838                 if (chain == NULL)
1839                         continue;
1840                 KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED);
1841
1842                 switch(chain->bref.type) {
1843                 case HAMMER2_BREF_TYPE_INODE:
1844                         wipdata = &hammer2_chain_wdata(chain)->ipdata;
1845                         KKASSERT(wipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA);
1846                         KKASSERT(bp->b_loffset == 0);
1847                         bcopy(bp->b_data, wipdata->u.data,
1848                               HAMMER2_EMBEDDED_BYTES);
1849                         error = 0;
1850                         break;
1851                 case HAMMER2_BREF_TYPE_DATA:
1852                         error = hammer2_io_newnz(chain->hmp,
1853                                                  chain->bref.data_off,
1854                                                  chain->bytes, &dio);
1855                         if (error) {
1856                                 hammer2_io_bqrelse(&dio);
1857                                 kprintf("hammer2: WRITE PATH: "
1858                                         "dbp bread error\n");
1859                                 break;
1860                         }
1861                         bdata = hammer2_io_data(dio, chain->bref.data_off);
1862
1863                         chain->bref.methods = HAMMER2_ENC_COMP(
1864                                                         HAMMER2_COMP_NONE) +
1865                                               HAMMER2_ENC_CHECK(check_algo);
1866                         bcopy(bp->b_data, bdata, chain->bytes);
1867
1868                         /*
1869                          * The flush code doesn't calculate check codes for
1870                          * file data (doing so can result in excessive I/O),
1871                          * so we do it here.
1872                          */
1873                         hammer2_chain_setcheck(chain, bdata);
1874
1875                         /*
1876                          * Device buffer is now valid, chain is no longer in
1877                          * the initial state.
1878                          *
1879                          * (No blockref table worries with file data)
1880                          */
1881                         atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL);
1882
1883                         if (ioflag & IO_SYNC) {
1884                                 /*
1885                                  * Synchronous I/O requested.
1886                                  */
1887                                 hammer2_io_bwrite(&dio);
1888                         /*
1889                         } else if ((ioflag & IO_DIRECT) &&
1890                                    loff + n == pblksize) {
1891                                 hammer2_io_bdwrite(&dio);
1892                         */
1893                         } else if (ioflag & IO_ASYNC) {
1894                                 hammer2_io_bawrite(&dio);
1895                         } else {
1896                                 hammer2_io_bdwrite(&dio);
1897                         }
1898                         break;
1899                 default:
1900                         panic("hammer2_write_bp: bad chain type %d\n",
1901                               chain->bref.type);
1902                         /* NOT REACHED */
1903                         error = 0;
1904                         break;
1905                 }
1906                 KKASSERT(error == 0);   /* XXX TODO */
1907         }
1908         *errorp = error;
1909 }
1910
1911 static
1912 int
1913 hammer2_remount(hammer2_dev_t *hmp, struct mount *mp, char *path,
1914                 struct vnode *devvp, struct ucred *cred)
1915 {
1916         int error;
1917
1918         if (hmp->ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
1919                 error = hammer2_recovery(hmp);
1920         } else {
1921                 error = 0;
1922         }
1923         return error;
1924 }
1925
1926 static
1927 int
1928 hammer2_vfs_unmount(struct mount *mp, int mntflags)
1929 {
1930         hammer2_pfs_t *pmp;
1931         int flags;
1932         int error = 0;
1933
1934         pmp = MPTOPMP(mp);
1935
1936         if (pmp == NULL)
1937                 return(0);
1938
1939         lockmgr(&hammer2_mntlk, LK_EXCLUSIVE);
1940
1941         /*
1942          * If mount initialization proceeded far enough we must flush
1943          * its vnodes and sync the underlying mount points.  Three syncs
1944          * are required to fully flush the filesystem (freemap updates lag
1945          * by one flush, and one extra for safety).
1946          */
1947         if (mntflags & MNT_FORCE)
1948                 flags = FORCECLOSE;
1949         else
1950                 flags = 0;
1951         if (pmp->iroot) {
1952                 error = vflush(mp, 0, flags);
1953                 if (error)
1954                         goto failed;
1955                 hammer2_vfs_sync(mp, MNT_WAIT);
1956                 hammer2_vfs_sync(mp, MNT_WAIT);
1957                 hammer2_vfs_sync(mp, MNT_WAIT);
1958         }
1959
1960         if (pmp->wthread_td) {
1961                 hammer2_mtx_ex(&pmp->wthread_mtx);
1962                 pmp->wthread_destroy = 1;
1963                 wakeup(&pmp->wthread_bioq);
1964                 while (pmp->wthread_destroy != -1) {
1965                         mtxsleep(&pmp->wthread_destroy,
1966                                 &pmp->wthread_mtx, 0,
1967                                 "umount-sleep", 0);
1968                 }
1969                 hammer2_mtx_unlock(&pmp->wthread_mtx);
1970                 pmp->wthread_td = NULL;
1971         }
1972
1973         /*
1974          * Cleanup our reference on ihidden.
1975          */
1976         if (pmp->ihidden) {
1977                 hammer2_inode_drop(pmp->ihidden);
1978                 pmp->ihidden = NULL;
1979         }
1980         if (pmp->mp)
1981                 hammer2_unmount_helper(mp, pmp, NULL);
1982
1983         error = 0;
1984 failed:
1985         lockmgr(&hammer2_mntlk, LK_RELEASE);
1986
1987         return (error);
1988 }
1989
1990 /*
1991  * Mount helper, hook the system mount into our PFS.
1992  * The mount lock is held.
1993  *
1994  * We must bump the mount_count on related devices for any
1995  * mounted PFSs.
1996  */
1997 static
1998 void
1999 hammer2_mount_helper(struct mount *mp, hammer2_pfs_t *pmp)
2000 {
2001         hammer2_cluster_t *cluster;
2002         hammer2_chain_t *rchain;
2003         int i;
2004
2005         mp->mnt_data = (qaddr_t)pmp;
2006         pmp->mp = mp;
2007
2008         /*
2009          * After pmp->mp is set we have to adjust hmp->mount_count.
2010          */
2011         cluster = &pmp->iroot->cluster;
2012         for (i = 0; i < cluster->nchains; ++i) {
2013                 rchain = cluster->array[i].chain;
2014                 if (rchain == NULL)
2015                         continue;
2016                 ++rchain->hmp->mount_count;
2017                 kprintf("hammer2_mount hmp=%p ++mount_count=%d\n",
2018                         rchain->hmp, rchain->hmp->mount_count);
2019         }
2020 }
2021
2022 /*
2023  * Mount helper, unhook the system mount from our PFS.
2024  * The mount lock is held.
2025  *
2026  * If hmp is supplied a mount responsible for being the first to open
2027  * the block device failed and the block device and all PFSs using the
2028  * block device must be cleaned up.
2029  *
2030  * If pmp is supplied multiple devices might be backing the PFS and each
2031  * must be disconnect.  This might not be the last PFS using some of the
2032  * underlying devices.  Also, we have to adjust our hmp->mount_count
2033  * accounting for the devices backing the pmp which is now undergoing an
2034  * unmount.
2035  */
2036 static
2037 void
2038 hammer2_unmount_helper(struct mount *mp, hammer2_pfs_t *pmp, hammer2_dev_t *hmp)
2039 {
2040         hammer2_cluster_t *cluster;
2041         hammer2_chain_t *rchain;
2042         struct vnode *devvp;
2043         int dumpcnt;
2044         int ronly = 0;
2045         int i;
2046
2047         /*
2048          * If no device supplied this is a high-level unmount and we have to
2049          * to disconnect the mount, adjust mount_count, and locate devices
2050          * that might now have no mounts.
2051          */
2052         if (pmp) {
2053                 KKASSERT(hmp == NULL);
2054                 KKASSERT((void *)(intptr_t)mp->mnt_data == pmp);
2055                 pmp->mp = NULL;
2056                 mp->mnt_data = NULL;
2057
2058                 /*
2059                  * After pmp->mp is cleared we have to account for
2060                  * mount_count.
2061                  */
2062                 cluster = &pmp->iroot->cluster;
2063                 for (i = 0; i < cluster->nchains; ++i) {
2064                         rchain = cluster->array[i].chain;
2065                         if (rchain == NULL)
2066                                 continue;
2067                         --rchain->hmp->mount_count;
2068                         kprintf("hammer2_unmount hmp=%p --mount_count=%d\n",
2069                                 rchain->hmp, rchain->hmp->mount_count);
2070                         /* scrapping hmp now may invalidate the pmp */
2071                 }
2072 again:
2073                 TAILQ_FOREACH(hmp, &hammer2_mntlist, mntentry) {
2074                         if (hmp->mount_count == 0) {
2075                                 hammer2_unmount_helper(NULL, NULL, hmp);
2076                                 goto again;
2077                         }
2078                 }
2079                 return;
2080         }
2081
2082         /*
2083          * Try to terminate the block device.  We can't terminate it if
2084          * there are still PFSs referencing it.
2085          */
2086         kprintf("hammer2_unmount hmp=%p mount_count=%d\n",
2087                 hmp, hmp->mount_count);
2088         if (hmp->mount_count)
2089                 return;
2090
2091         hammer2_pfsfree_scan(hmp);
2092         hammer2_dev_exlock(hmp);        /* XXX order */
2093
2094         /*
2095          * Cycle the volume data lock as a safety (probably not needed any
2096          * more).  To ensure everything is out we need to flush at least
2097          * three times.  (1) The running of the unlinkq can dirty the
2098          * filesystem, (2) A normal flush can dirty the freemap, and
2099          * (3) ensure that the freemap is fully synchronized.
2100          *
2101          * The next mount's recovery scan can clean everything up but we want
2102          * to leave the filesystem in a 100% clean state on a normal unmount.
2103          */
2104 #if 0
2105         hammer2_voldata_lock(hmp);
2106         hammer2_voldata_unlock(hmp);
2107 #endif
2108         hammer2_iocom_uninit(hmp);
2109
2110         if ((hmp->vchain.flags | hmp->fchain.flags) &
2111             HAMMER2_CHAIN_FLUSH_MASK) {
2112                 kprintf("hammer2_unmount: chains left over "
2113                         "after final sync\n");
2114                 kprintf("    vchain %08x\n", hmp->vchain.flags);
2115                 kprintf("    fchain %08x\n", hmp->fchain.flags);
2116
2117                 if (hammer2_debug & 0x0010)
2118                         Debugger("entered debugger");
2119         }
2120
2121         KKASSERT(hmp->spmp == NULL);
2122
2123         /*
2124          * Finish up with the device vnode
2125          */
2126         if ((devvp = hmp->devvp) != NULL) {
2127                 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
2128                 vinvalbuf(devvp, (ronly ? 0 : V_SAVE), 0, 0);
2129                 hmp->devvp = NULL;
2130                 VOP_CLOSE(devvp, (ronly ? FREAD : FREAD|FWRITE), NULL);
2131                 vn_unlock(devvp);
2132                 vrele(devvp);
2133                 devvp = NULL;
2134         }
2135
2136         /*
2137          * Clear vchain/fchain flags that might prevent final cleanup
2138          * of these chains.
2139          */
2140         if (hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) {
2141                 atomic_clear_int(&hmp->vchain.flags,
2142                                  HAMMER2_CHAIN_MODIFIED);
2143                 hammer2_pfs_memory_wakeup(hmp->vchain.pmp);
2144                 hammer2_chain_drop(&hmp->vchain);
2145         }
2146         if (hmp->vchain.flags & HAMMER2_CHAIN_UPDATE) {
2147                 atomic_clear_int(&hmp->vchain.flags,
2148                                  HAMMER2_CHAIN_UPDATE);
2149                 hammer2_chain_drop(&hmp->vchain);
2150         }
2151
2152         if (hmp->fchain.flags & HAMMER2_CHAIN_MODIFIED) {
2153                 atomic_clear_int(&hmp->fchain.flags,
2154                                  HAMMER2_CHAIN_MODIFIED);
2155                 hammer2_pfs_memory_wakeup(hmp->fchain.pmp);
2156                 hammer2_chain_drop(&hmp->fchain);
2157         }
2158         if (hmp->fchain.flags & HAMMER2_CHAIN_UPDATE) {
2159                 atomic_clear_int(&hmp->fchain.flags,
2160                                  HAMMER2_CHAIN_UPDATE);
2161                 hammer2_chain_drop(&hmp->fchain);
2162         }
2163
2164         /*
2165          * Final drop of embedded freemap root chain to
2166          * clean up fchain.core (fchain structure is not
2167          * flagged ALLOCATED so it is cleaned out and then
2168          * left to rot).
2169          */
2170         hammer2_chain_drop(&hmp->fchain);
2171
2172         /*
2173          * Final drop of embedded volume root chain to clean
2174          * up vchain.core (vchain structure is not flagged
2175          * ALLOCATED so it is cleaned out and then left to
2176          * rot).
2177          */
2178         dumpcnt = 50;
2179         hammer2_dump_chain(&hmp->vchain, 0, &dumpcnt, 'v');
2180         dumpcnt = 50;
2181         hammer2_dump_chain(&hmp->fchain, 0, &dumpcnt, 'f');
2182         hammer2_dev_unlock(hmp);
2183         hammer2_chain_drop(&hmp->vchain);
2184
2185         hammer2_io_cleanup(hmp, &hmp->iotree);
2186         if (hmp->iofree_count) {
2187                 kprintf("io_cleanup: %d I/O's left hanging\n",
2188                         hmp->iofree_count);
2189         }
2190
2191         TAILQ_REMOVE(&hammer2_mntlist, hmp, mntentry);
2192         kmalloc_destroy(&hmp->mchain);
2193         kfree(hmp, M_HAMMER2);
2194 }
2195
2196 static
2197 int
2198 hammer2_vfs_vget(struct mount *mp, struct vnode *dvp,
2199              ino_t ino, struct vnode **vpp)
2200 {
2201         kprintf("hammer2_vget\n");
2202         return (EOPNOTSUPP);
2203 }
2204
2205 static
2206 int
2207 hammer2_vfs_root(struct mount *mp, struct vnode **vpp)
2208 {
2209         hammer2_pfs_t *pmp;
2210         hammer2_cluster_t *cparent;
2211         int error;
2212         struct vnode *vp;
2213
2214         pmp = MPTOPMP(mp);
2215         if (pmp->iroot == NULL) {
2216                 *vpp = NULL;
2217                 error = EINVAL;
2218         } else {
2219                 cparent = hammer2_inode_lock(pmp->iroot,
2220                                                 HAMMER2_RESOLVE_ALWAYS |
2221                                                 HAMMER2_RESOLVE_SHARED);
2222                 vp = hammer2_igetv(pmp->iroot, cparent, &error);
2223                 hammer2_inode_unlock(pmp->iroot, cparent);
2224                 *vpp = vp;
2225                 if (vp == NULL)
2226                         kprintf("vnodefail\n");
2227         }
2228
2229         return (error);
2230 }
2231
2232 /*
2233  * Filesystem status
2234  *
2235  * XXX incorporate ipdata->inode_quota and data_quota
2236  */
2237 static
2238 int
2239 hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
2240 {
2241         hammer2_pfs_t *pmp;
2242         hammer2_dev_t *hmp;
2243
2244         pmp = MPTOPMP(mp);
2245         KKASSERT(pmp->iroot->cluster.nchains >= 1);
2246         hmp = pmp->iroot->cluster.focus->hmp;   /* XXX */
2247
2248         mp->mnt_stat.f_files = pmp->inode_count;
2249         mp->mnt_stat.f_ffree = 0;
2250         mp->mnt_stat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE;
2251         mp->mnt_stat.f_bfree =  hmp->voldata.allocator_free / HAMMER2_PBUFSIZE;
2252         mp->mnt_stat.f_bavail = mp->mnt_stat.f_bfree;
2253
2254         *sbp = mp->mnt_stat;
2255         return (0);
2256 }
2257
2258 static
2259 int
2260 hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred)
2261 {
2262         hammer2_pfs_t *pmp;
2263         hammer2_dev_t *hmp;
2264
2265         pmp = MPTOPMP(mp);
2266         KKASSERT(pmp->iroot->cluster.nchains >= 1);
2267         hmp = pmp->iroot->cluster.focus->hmp;   /* XXX */
2268
2269         mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE;
2270         mp->mnt_vstat.f_files = pmp->inode_count;
2271         mp->mnt_vstat.f_ffree = 0;
2272         mp->mnt_vstat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE;
2273         mp->mnt_vstat.f_bfree =  hmp->voldata.allocator_free / HAMMER2_PBUFSIZE;
2274         mp->mnt_vstat.f_bavail = mp->mnt_vstat.f_bfree;
2275
2276         *sbp = mp->mnt_vstat;
2277         return (0);
2278 }
2279
2280 /*
2281  * Mount-time recovery (RW mounts)
2282  *
2283  * Updates to the free block table are allowed to lag flushes by one
2284  * transaction.  In case of a crash, then on a fresh mount we must do an
2285  * incremental scan of the last committed transaction id and make sure that
2286  * all related blocks have been marked allocated.
2287  *
2288  * The super-root topology and each PFS has its own transaction id domain,
2289  * so we must track PFS boundary transitions.
2290  */
2291 struct hammer2_recovery_elm {
2292         TAILQ_ENTRY(hammer2_recovery_elm) entry;
2293         hammer2_chain_t *chain;
2294         hammer2_tid_t sync_tid;
2295 };
2296
2297 TAILQ_HEAD(hammer2_recovery_list, hammer2_recovery_elm);
2298
2299 struct hammer2_recovery_info {
2300         struct hammer2_recovery_list list;
2301         int     depth;
2302 };
2303
2304 static int hammer2_recovery_scan(hammer2_trans_t *trans, hammer2_dev_t *hmp,
2305                         hammer2_chain_t *parent,
2306                         struct hammer2_recovery_info *info,
2307                         hammer2_tid_t sync_tid);
2308
2309 #define HAMMER2_RECOVERY_MAXDEPTH       10
2310
2311 static
2312 int
2313 hammer2_recovery(hammer2_dev_t *hmp)
2314 {
2315         hammer2_trans_t trans;
2316         struct hammer2_recovery_info info;
2317         struct hammer2_recovery_elm *elm;
2318         hammer2_chain_t *parent;
2319         hammer2_tid_t sync_tid;
2320         hammer2_tid_t mirror_tid;
2321         int error;
2322         int cumulative_error = 0;
2323
2324         hammer2_trans_init(&trans, hmp->spmp, 0);
2325
2326         sync_tid = hmp->voldata.freemap_tid;
2327         mirror_tid = hmp->voldata.mirror_tid;
2328
2329         kprintf("hammer2 mount \"%s\": ", hmp->devrepname);
2330         if (sync_tid >= mirror_tid) {
2331                 kprintf(" no recovery needed\n");
2332         } else {
2333                 kprintf(" freemap recovery %016jx-%016jx\n",
2334                         sync_tid + 1, mirror_tid);
2335         }
2336
2337         TAILQ_INIT(&info.list);
2338         info.depth = 0;
2339         parent = hammer2_chain_lookup_init(&hmp->vchain, 0);
2340         cumulative_error = hammer2_recovery_scan(&trans, hmp, parent,
2341                                                  &info, sync_tid);
2342         hammer2_chain_lookup_done(parent);
2343
2344         while ((elm = TAILQ_FIRST(&info.list)) != NULL) {
2345                 TAILQ_REMOVE(&info.list, elm, entry);
2346                 parent = elm->chain;
2347                 sync_tid = elm->sync_tid;
2348                 kfree(elm, M_HAMMER2);
2349
2350                 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS);
2351                 error = hammer2_recovery_scan(&trans, hmp, parent,
2352                                               &info,
2353                                               hmp->voldata.freemap_tid);
2354                 hammer2_chain_unlock(parent);
2355                 hammer2_chain_drop(parent);     /* drop elm->chain ref */
2356                 if (error)
2357                         cumulative_error = error;
2358         }
2359         hammer2_trans_done(&trans);
2360
2361         return cumulative_error;
2362 }
2363
2364 static
2365 int
2366 hammer2_recovery_scan(hammer2_trans_t *trans, hammer2_dev_t *hmp,
2367                       hammer2_chain_t *parent,
2368                       struct hammer2_recovery_info *info,
2369                       hammer2_tid_t sync_tid)
2370 {
2371         const hammer2_inode_data_t *ripdata;
2372         hammer2_chain_t *chain;
2373         int cache_index;
2374         int cumulative_error = 0;
2375         int error;
2376
2377         /*
2378          * Adjust freemap to ensure that the block(s) are marked allocated.
2379          */
2380         if (parent->bref.type != HAMMER2_BREF_TYPE_VOLUME) {
2381                 hammer2_freemap_adjust(trans, hmp, &parent->bref,
2382                                        HAMMER2_FREEMAP_DORECOVER);
2383         }
2384
2385         /*
2386          * Check type for recursive scan
2387          */
2388         switch(parent->bref.type) {
2389         case HAMMER2_BREF_TYPE_VOLUME:
2390                 /* data already instantiated */
2391                 break;
2392         case HAMMER2_BREF_TYPE_INODE:
2393                 /*
2394                  * Must instantiate data for DIRECTDATA test and also
2395                  * for recursion.
2396                  */
2397                 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS);
2398                 ripdata = &hammer2_chain_rdata(parent)->ipdata;
2399                 if (ripdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) {
2400                         /* not applicable to recovery scan */
2401                         hammer2_chain_unlock(parent);
2402                         return 0;
2403                 }
2404                 hammer2_chain_unlock(parent);
2405                 break;
2406         case HAMMER2_BREF_TYPE_INDIRECT:
2407                 /*
2408                  * Must instantiate data for recursion
2409                  */
2410                 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS);
2411                 hammer2_chain_unlock(parent);
2412                 break;
2413         case HAMMER2_BREF_TYPE_DATA:
2414         case HAMMER2_BREF_TYPE_FREEMAP:
2415         case HAMMER2_BREF_TYPE_FREEMAP_NODE:
2416         case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
2417                 /* not applicable to recovery scan */
2418                 return 0;
2419                 break;
2420         default:
2421                 return EDOM;
2422         }
2423
2424         /*
2425          * Defer operation if depth limit reached or if we are crossing a
2426          * PFS boundary.
2427          */
2428         if (info->depth >= HAMMER2_RECOVERY_MAXDEPTH) {
2429                 struct hammer2_recovery_elm *elm;
2430
2431                 elm = kmalloc(sizeof(*elm), M_HAMMER2, M_ZERO | M_WAITOK);
2432                 elm->chain = parent;
2433                 elm->sync_tid = sync_tid;
2434                 hammer2_chain_ref(parent);
2435                 TAILQ_INSERT_TAIL(&info->list, elm, entry);
2436                 /* unlocked by caller */
2437
2438                 return(0);
2439         }
2440
2441
2442         /*
2443          * Recursive scan of the last flushed transaction only.  We are
2444          * doing this without pmp assignments so don't leave the chains
2445          * hanging around after we are done with them.
2446          */
2447         cache_index = 0;
2448         chain = hammer2_chain_scan(parent, NULL, &cache_index,
2449                                    HAMMER2_LOOKUP_NODATA);
2450         while (chain) {
2451                 atomic_set_int(&chain->flags, HAMMER2_CHAIN_RELEASE);
2452                 if (chain->bref.mirror_tid > sync_tid) {
2453                         ++info->depth;
2454                         error = hammer2_recovery_scan(trans, hmp, chain,
2455                                                       info, sync_tid);
2456                         --info->depth;
2457                         if (error)
2458                                 cumulative_error = error;
2459                 }
2460
2461                 /*
2462                  * Flush the recovery at the PFS boundary to stage it for
2463                  * the final flush of the super-root topology.
2464                  */
2465                 if ((chain->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) &&
2466                     (chain->flags & HAMMER2_CHAIN_ONFLUSH)) {
2467                         hammer2_flush(trans, chain, 1);
2468                 }
2469                 chain = hammer2_chain_scan(parent, chain, &cache_index,
2470                                            HAMMER2_LOOKUP_NODATA);
2471         }
2472
2473         return cumulative_error;
2474 }
2475
2476 /*
2477  * Sync a mount point; this is called on a per-mount basis from the
2478  * filesystem syncer process periodically and whenever a user issues
2479  * a sync.
2480  */
2481 int
2482 hammer2_vfs_sync(struct mount *mp, int waitfor)
2483 {
2484         struct hammer2_sync_info info;
2485         hammer2_inode_t *iroot;
2486         hammer2_chain_t *chain;
2487         hammer2_chain_t *parent;
2488         hammer2_pfs_t *pmp;
2489         hammer2_dev_t *hmp;
2490         int flags;
2491         int error;
2492         int total_error;
2493         int i;
2494         int j;
2495
2496         pmp = MPTOPMP(mp);
2497         iroot = pmp->iroot;
2498         KKASSERT(iroot);
2499         KKASSERT(iroot->pmp == pmp);
2500
2501         /*
2502          * We can't acquire locks on existing vnodes while in a transaction
2503          * without risking a deadlock.  This assumes that vfsync() can be
2504          * called without the vnode locked (which it can in DragonFly).
2505          * Otherwise we'd have to implement a multi-pass or flag the lock
2506          * failures and retry.
2507          *
2508          * The reclamation code interlocks with the sync list's token
2509          * (by removing the vnode from the scan list) before unlocking
2510          * the inode, giving us time to ref the inode.
2511          */
2512         /*flags = VMSC_GETVP;*/
2513         flags = 0;
2514         if (waitfor & MNT_LAZY)
2515                 flags |= VMSC_ONEPASS;
2516
2517 #if 0
2518         /*
2519          * Preflush the vnodes using a normal transaction before interlocking
2520          * with a flush transaction.
2521          */
2522         hammer2_trans_init(&info.trans, pmp, 0);
2523         info.error = 0;
2524         info.waitfor = MNT_NOWAIT;
2525         vsyncscan(mp, flags | VMSC_NOWAIT, hammer2_sync_scan2, &info);
2526         hammer2_trans_done(&info.trans);
2527 #endif
2528
2529         /*
2530          * Start our flush transaction.  This does not return until all
2531          * concurrent transactions have completed and will prevent any
2532          * new transactions from running concurrently, except for the
2533          * buffer cache transactions.
2534          *
2535          * For efficiency do an async pass before making sure with a
2536          * synchronous pass on all related buffer cache buffers.  It
2537          * should theoretically not be possible for any new file buffers
2538          * to be instantiated during this sequence.
2539          */
2540         hammer2_trans_init(&info.trans, pmp, HAMMER2_TRANS_ISFLUSH |
2541                                              HAMMER2_TRANS_PREFLUSH);
2542         hammer2_run_unlinkq(&info.trans, pmp);
2543
2544         info.error = 0;
2545         info.waitfor = MNT_NOWAIT;
2546         vsyncscan(mp, flags | VMSC_NOWAIT, hammer2_sync_scan2, &info);
2547         info.waitfor = MNT_WAIT;
2548         vsyncscan(mp, flags, hammer2_sync_scan2, &info);
2549
2550         /*
2551          * Clear PREFLUSH.  This prevents (or asserts on) any new logical
2552          * buffer cache flushes which occur during the flush.  Device buffers
2553          * are not affected.
2554          */
2555         hammer2_bioq_sync(info.trans.pmp);
2556         atomic_clear_int(&info.trans.flags, HAMMER2_TRANS_PREFLUSH);
2557
2558         total_error = 0;
2559
2560         /*
2561          * Flush all nodes to synchronize the PFSROOT subtopology to the media.
2562          *
2563          * Note that this flush will not be visible on crash recovery until
2564          * we flush the super-root topology in the next loop.
2565          */
2566         for (i = 0; iroot && i < iroot->cluster.nchains; ++i) {
2567                 chain = iroot->cluster.array[i].chain;
2568                 if (chain == NULL)
2569                         continue;
2570
2571                 hammer2_chain_ref(chain);
2572                 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
2573                 if (chain->flags & HAMMER2_CHAIN_FLUSH_MASK) {
2574                         hammer2_flush(&info.trans, chain, 1);
2575                         parent = chain->parent;
2576                         KKASSERT(chain->pmp != parent->pmp);
2577                         hammer2_chain_setflush(&info.trans, parent);
2578                 }
2579                 hammer2_chain_unlock(chain);
2580                 hammer2_chain_drop(chain);
2581         }
2582         hammer2_trans_done(&info.trans);
2583
2584         /*
2585          * Flush all volume roots to synchronize PFS flushes with the
2586          * storage media volume header.  This will flush the freemap and
2587          * the superroot topology but stops when it reaches a PFSROOT
2588          * (which we already flushed above).
2589          *
2590          * This is the last step which connects the volume root to the
2591          * PFSROOT dirs flushed above.
2592          *
2593          * Each spmp (representing the hmp's super-root) requires its own
2594          * transaction.
2595          */
2596         for (i = 0; iroot && i < iroot->cluster.nchains; ++i) {
2597                 hammer2_chain_t *tmp;
2598
2599                 chain = iroot->cluster.array[i].chain;
2600                 if (chain == NULL)
2601                         continue;
2602
2603                 hmp = chain->hmp;
2604
2605                 /*
2606                  * We only have to flush each hmp once
2607                  */
2608                 for (j = i - 1; j >= 0; --j) {
2609                         if ((tmp = iroot->cluster.array[j].chain) != NULL) {
2610                                 if (tmp->hmp == hmp)
2611                                         break;
2612                         }
2613                 }
2614                 if (j >= 0)
2615                         continue;
2616
2617                 /*
2618                  * spmp transaction.  The super-root is never directly
2619                  * mounted so there shouldn't be any vnodes, let alone any
2620                  * dirty vnodes associated with it.
2621                  */
2622                 hammer2_trans_init(&info.trans, hmp->spmp,
2623                                    HAMMER2_TRANS_ISFLUSH);
2624
2625                 /*
2626                  * Media mounts have two 'roots', vchain for the topology
2627                  * and fchain for the free block table.  Flush both.
2628                  *
2629                  * Note that the topology and free block table are handled
2630                  * independently, so the free block table can wind up being
2631                  * ahead of the topology.  We depend on the bulk free scan
2632                  * code to deal with any loose ends.
2633                  */
2634                 hammer2_chain_ref(&hmp->vchain);
2635                 hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS);
2636                 hammer2_chain_ref(&hmp->fchain);
2637                 hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS);
2638                 if (hmp->fchain.flags & HAMMER2_CHAIN_FLUSH_MASK) {
2639                         /*
2640                          * This will also modify vchain as a side effect,
2641                          * mark vchain as modified now.
2642                          */
2643                         hammer2_voldata_modify(hmp);
2644                         chain = &hmp->fchain;
2645                         hammer2_flush(&info.trans, chain, 1);
2646                         KKASSERT(chain == &hmp->fchain);
2647                 }
2648                 hammer2_chain_unlock(&hmp->fchain);
2649                 hammer2_chain_unlock(&hmp->vchain);
2650                 hammer2_chain_drop(&hmp->fchain);
2651                 /* vchain dropped down below */
2652
2653                 hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS);
2654                 if (hmp->vchain.flags & HAMMER2_CHAIN_FLUSH_MASK) {
2655                         chain = &hmp->vchain;
2656                         hammer2_flush(&info.trans, chain, 1);
2657                         KKASSERT(chain == &hmp->vchain);
2658                 }
2659                 hammer2_chain_unlock(&hmp->vchain);
2660                 hammer2_chain_drop(&hmp->vchain);
2661
2662                 error = 0;
2663
2664                 /*
2665                  * We can't safely flush the volume header until we have
2666                  * flushed any device buffers which have built up.
2667                  *
2668                  * XXX this isn't being incremental
2669                  */
2670                 vn_lock(hmp->devvp, LK_EXCLUSIVE | LK_RETRY);
2671                 error = VOP_FSYNC(hmp->devvp, MNT_WAIT, 0);
2672                 vn_unlock(hmp->devvp);
2673
2674                 /*
2675                  * The flush code sets CHAIN_VOLUMESYNC to indicate that the
2676                  * volume header needs synchronization via hmp->volsync.
2677                  *
2678                  * XXX synchronize the flag & data with only this flush XXX
2679                  */
2680                 if (error == 0 &&
2681                     (hmp->vchain.flags & HAMMER2_CHAIN_VOLUMESYNC)) {
2682                         struct buf *bp;
2683
2684                         /*
2685                          * Synchronize the disk before flushing the volume
2686                          * header.
2687                          */
2688                         bp = getpbuf(NULL);
2689                         bp->b_bio1.bio_offset = 0;
2690                         bp->b_bufsize = 0;
2691                         bp->b_bcount = 0;
2692                         bp->b_cmd = BUF_CMD_FLUSH;
2693                         bp->b_bio1.bio_done = biodone_sync;
2694                         bp->b_bio1.bio_flags |= BIO_SYNC;
2695                         vn_strategy(hmp->devvp, &bp->b_bio1);
2696                         biowait(&bp->b_bio1, "h2vol");
2697                         relpbuf(bp, NULL);
2698
2699                         /*
2700                          * Then we can safely flush the version of the
2701                          * volume header synchronized by the flush code.
2702                          */
2703                         i = hmp->volhdrno + 1;
2704                         if (i >= HAMMER2_NUM_VOLHDRS)
2705                                 i = 0;
2706                         if (i * HAMMER2_ZONE_BYTES64 + HAMMER2_SEGSIZE >
2707                             hmp->volsync.volu_size) {
2708                                 i = 0;
2709                         }
2710                         kprintf("sync volhdr %d %jd\n",
2711                                 i, (intmax_t)hmp->volsync.volu_size);
2712                         bp = getblk(hmp->devvp, i * HAMMER2_ZONE_BYTES64,
2713                                     HAMMER2_PBUFSIZE, 0, 0);
2714                         atomic_clear_int(&hmp->vchain.flags,
2715                                          HAMMER2_CHAIN_VOLUMESYNC);
2716                         bcopy(&hmp->volsync, bp->b_data, HAMMER2_PBUFSIZE);
2717                         bawrite(bp);
2718                         hmp->volhdrno = i;
2719                 }
2720                 if (error)
2721                         total_error = error;
2722
2723                 hammer2_trans_done(&info.trans);        /* spmp trans */
2724         }
2725         return (total_error);
2726 }
2727
2728 /*
2729  * Sync passes.
2730  */
2731 static int
2732 hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data)
2733 {
2734         struct hammer2_sync_info *info = data;
2735         hammer2_inode_t *ip;
2736         int error;
2737
2738         /*
2739          *
2740          */
2741         ip = VTOI(vp);
2742         if (ip == NULL) {
2743                 vclrisdirty(vp);
2744                 return(0);
2745         }
2746         if (vp->v_type == VNON || vp->v_type == VBAD) {
2747                 vclrisdirty(vp);
2748                 return(0);
2749         }
2750
2751         /*
2752          * VOP_FSYNC will start a new transaction so replicate some code
2753          * here to do it inline (see hammer2_vop_fsync()).
2754          *
2755          * WARNING: The vfsync interacts with the buffer cache and might
2756          *          block, we can't hold the inode lock at that time.
2757          *          However, we MUST ref ip before blocking to ensure that
2758          *          it isn't ripped out from under us (since we do not
2759          *          hold a lock on the vnode).
2760          */
2761         hammer2_inode_ref(ip);
2762         atomic_clear_int(&ip->flags, HAMMER2_INODE_MODIFIED);
2763         if ((ip->flags & HAMMER2_INODE_MODIFIED) ||
2764             !RB_EMPTY(&vp->v_rbdirty_tree)) {
2765                 vfsync(vp, info->waitfor, 1, NULL, NULL);
2766         }
2767         if ((ip->flags & HAMMER2_INODE_MODIFIED) == 0 &&
2768             RB_EMPTY(&vp->v_rbdirty_tree)) {
2769                 vclrisdirty(vp);
2770         }
2771
2772         hammer2_inode_drop(ip);
2773 #if 1
2774         error = 0;
2775         if (error)
2776                 info->error = error;
2777 #endif
2778         return(0);
2779 }
2780
2781 static
2782 int
2783 hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp)
2784 {
2785         return (0);
2786 }
2787
2788 static
2789 int
2790 hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp,
2791                struct fid *fhp, struct vnode **vpp)
2792 {
2793         return (0);
2794 }
2795
2796 static
2797 int
2798 hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam,
2799                  int *exflagsp, struct ucred **credanonp)
2800 {
2801         return (0);
2802 }
2803
2804 /*
2805  * Support code for hammer2_vfs_mount().  Read, verify, and install the volume
2806  * header into the HMP
2807  *
2808  * XXX read four volhdrs and use the one with the highest TID whos CRC
2809  *     matches.
2810  *
2811  * XXX check iCRCs.
2812  *
2813  * XXX For filesystems w/ less than 4 volhdrs, make sure to not write to
2814  *     nonexistant locations.
2815  *
2816  * XXX Record selected volhdr and ring updates to each of 4 volhdrs
2817  */
2818 static
2819 int
2820 hammer2_install_volume_header(hammer2_dev_t *hmp)
2821 {
2822         hammer2_volume_data_t *vd;
2823         struct buf *bp;
2824         hammer2_crc32_t crc0, crc, bcrc0, bcrc;
2825         int error_reported;
2826         int error;
2827         int valid;
2828         int i;
2829
2830         error_reported = 0;
2831         error = 0;
2832         valid = 0;
2833         bp = NULL;
2834
2835         /*
2836          * There are up to 4 copies of the volume header (syncs iterate
2837          * between them so there is no single master).  We don't trust the
2838          * volu_size field so we don't know precisely how large the filesystem
2839          * is, so depend on the OS to return an error if we go beyond the
2840          * block device's EOF.
2841          */
2842         for (i = 0; i < HAMMER2_NUM_VOLHDRS; i++) {
2843                 error = bread(hmp->devvp, i * HAMMER2_ZONE_BYTES64,
2844                               HAMMER2_VOLUME_BYTES, &bp);
2845                 if (error) {
2846                         brelse(bp);
2847                         bp = NULL;
2848                         continue;
2849                 }
2850
2851                 vd = (struct hammer2_volume_data *) bp->b_data;
2852                 if ((vd->magic != HAMMER2_VOLUME_ID_HBO) &&
2853                     (vd->magic != HAMMER2_VOLUME_ID_ABO)) {
2854                         brelse(bp);
2855                         bp = NULL;
2856                         continue;
2857                 }
2858
2859                 if (vd->magic == HAMMER2_VOLUME_ID_ABO) {
2860                         /* XXX: Reversed-endianness filesystem */
2861                         kprintf("hammer2: reverse-endian filesystem detected");
2862                         brelse(bp);
2863                         bp = NULL;
2864                         continue;
2865                 }
2866
2867                 crc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT0];
2868                 crc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC0_OFF,
2869                                       HAMMER2_VOLUME_ICRC0_SIZE);
2870                 bcrc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT1];
2871                 bcrc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC1_OFF,
2872                                        HAMMER2_VOLUME_ICRC1_SIZE);
2873                 if ((crc0 != crc) || (bcrc0 != bcrc)) {
2874                         kprintf("hammer2 volume header crc "
2875                                 "mismatch copy #%d %08x/%08x\n",
2876                                 i, crc0, crc);
2877                         error_reported = 1;
2878                         brelse(bp);
2879                         bp = NULL;
2880                         continue;
2881                 }
2882                 if (valid == 0 || hmp->voldata.mirror_tid < vd->mirror_tid) {
2883                         valid = 1;
2884                         hmp->voldata = *vd;
2885                         hmp->volhdrno = i;
2886                 }
2887                 brelse(bp);
2888                 bp = NULL;
2889         }
2890         if (valid) {
2891                 hmp->volsync = hmp->voldata;
2892                 error = 0;
2893                 if (error_reported || bootverbose || 1) { /* 1/DEBUG */
2894                         kprintf("hammer2: using volume header #%d\n",
2895                                 hmp->volhdrno);
2896                 }
2897         } else {
2898                 error = EINVAL;
2899                 kprintf("hammer2: no valid volume headers found!\n");
2900         }
2901         return (error);
2902 }
2903
2904 /*
2905  * This handles hysteresis on regular file flushes.  Because the BIOs are
2906  * routed to a thread it is possible for an excessive number to build up
2907  * and cause long front-end stalls long before the runningbuffspace limit
2908  * is hit, so we implement hammer2_flush_pipe to control the
2909  * hysteresis.
2910  *
2911  * This is a particular problem when compression is used.
2912  */
2913 void
2914 hammer2_lwinprog_ref(hammer2_pfs_t *pmp)
2915 {
2916         atomic_add_int(&pmp->count_lwinprog, 1);
2917 }
2918
2919 void
2920 hammer2_lwinprog_drop(hammer2_pfs_t *pmp)
2921 {
2922         int lwinprog;
2923
2924         lwinprog = atomic_fetchadd_int(&pmp->count_lwinprog, -1);
2925         if ((lwinprog & HAMMER2_LWINPROG_WAITING) &&
2926             (lwinprog & HAMMER2_LWINPROG_MASK) <= hammer2_flush_pipe * 2 / 3) {
2927                 atomic_clear_int(&pmp->count_lwinprog,
2928                                  HAMMER2_LWINPROG_WAITING);
2929                 wakeup(&pmp->count_lwinprog);
2930         }
2931 }
2932
2933 void
2934 hammer2_lwinprog_wait(hammer2_pfs_t *pmp)
2935 {
2936         int lwinprog;
2937
2938         for (;;) {
2939                 lwinprog = pmp->count_lwinprog;
2940                 cpu_ccfence();
2941                 if ((lwinprog & HAMMER2_LWINPROG_MASK) < hammer2_flush_pipe)
2942                         break;
2943                 tsleep_interlock(&pmp->count_lwinprog, 0);
2944                 atomic_set_int(&pmp->count_lwinprog, HAMMER2_LWINPROG_WAITING);
2945                 lwinprog = pmp->count_lwinprog;
2946                 if ((lwinprog & HAMMER2_LWINPROG_MASK) < hammer2_flush_pipe)
2947                         break;
2948                 tsleep(&pmp->count_lwinprog, PINTERLOCKED, "h2wpipe", hz);
2949         }
2950 }
2951
2952 /*
2953  * Manage excessive memory resource use for chain and related
2954  * structures.
2955  */
2956 void
2957 hammer2_pfs_memory_wait(hammer2_pfs_t *pmp)
2958 {
2959         uint32_t waiting;
2960         uint32_t count;
2961         uint32_t limit;
2962 #if 0
2963         static int zzticks;
2964 #endif
2965
2966         /*
2967          * Atomic check condition and wait.  Also do an early speedup of
2968          * the syncer to try to avoid hitting the wait.
2969          */
2970         for (;;) {
2971                 waiting = pmp->inmem_dirty_chains;
2972                 cpu_ccfence();
2973                 count = waiting & HAMMER2_DIRTYCHAIN_MASK;
2974
2975                 limit = pmp->mp->mnt_nvnodelistsize / 10;
2976                 if (limit < hammer2_limit_dirty_chains)
2977                         limit = hammer2_limit_dirty_chains;
2978                 if (limit < 1000)
2979                         limit = 1000;
2980
2981 #if 0
2982                 if ((int)(ticks - zzticks) > hz) {
2983                         zzticks = ticks;
2984                         kprintf("count %ld %ld\n", count, limit);
2985                 }
2986 #endif
2987
2988                 /*
2989                  * Block if there are too many dirty chains present, wait
2990                  * for the flush to clean some out.
2991                  */
2992                 if (count > limit) {
2993                         tsleep_interlock(&pmp->inmem_dirty_chains, 0);
2994                         if (atomic_cmpset_int(&pmp->inmem_dirty_chains,
2995                                                waiting,
2996                                        waiting | HAMMER2_DIRTYCHAIN_WAITING)) {
2997                                 speedup_syncer(pmp->mp);
2998                                 tsleep(&pmp->inmem_dirty_chains, PINTERLOCKED,
2999                                        "chnmem", hz);
3000                         }
3001                         continue;       /* loop on success or fail */
3002                 }
3003
3004                 /*
3005                  * Try to start an early flush before we are forced to block.
3006                  */
3007                 if (count > limit * 7 / 10)
3008                         speedup_syncer(pmp->mp);
3009                 break;
3010         }
3011 }
3012
3013 void
3014 hammer2_pfs_memory_inc(hammer2_pfs_t *pmp)
3015 {
3016         if (pmp) {
3017                 atomic_add_int(&pmp->inmem_dirty_chains, 1);
3018         }
3019 }
3020
3021 void
3022 hammer2_pfs_memory_wakeup(hammer2_pfs_t *pmp)
3023 {
3024         uint32_t waiting;
3025
3026         if (pmp == NULL)
3027                 return;
3028
3029         for (;;) {
3030                 waiting = pmp->inmem_dirty_chains;
3031                 cpu_ccfence();
3032                 if (atomic_cmpset_int(&pmp->inmem_dirty_chains,
3033                                        waiting,
3034                                        (waiting - 1) &
3035                                         ~HAMMER2_DIRTYCHAIN_WAITING)) {
3036                         break;
3037                 }
3038         }
3039
3040         if (waiting & HAMMER2_DIRTYCHAIN_WAITING)
3041                 wakeup(&pmp->inmem_dirty_chains);
3042 }
3043
3044 /*
3045  * Debugging
3046  */
3047 void
3048 hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp, char pfx)
3049 {
3050         hammer2_chain_t *scan;
3051         hammer2_chain_t *parent;
3052
3053         --*countp;
3054         if (*countp == 0) {
3055                 kprintf("%*.*s...\n", tab, tab, "");
3056                 return;
3057         }
3058         if (*countp < 0)
3059                 return;
3060         kprintf("%*.*s%c-chain %p.%d %016jx/%d mir=%016jx\n",
3061                 tab, tab, "", pfx,
3062                 chain, chain->bref.type,
3063                 chain->bref.key, chain->bref.keybits,
3064                 chain->bref.mirror_tid);
3065
3066         kprintf("%*.*s      [%08x] (%s) refs=%d\n",
3067                 tab, tab, "",
3068                 chain->flags,
3069                 ((chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
3070                 chain->data) ?  (char *)chain->data->ipdata.filename : "?"),
3071                 chain->refs);
3072
3073         kprintf("%*.*s      core [%08x]",
3074                 tab, tab, "",
3075                 chain->core.flags);
3076
3077         parent = chain->parent;
3078         if (parent)
3079                 kprintf("\n%*.*s      p=%p [pflags %08x prefs %d",
3080                         tab, tab, "",
3081                         parent, parent->flags, parent->refs);
3082         if (RB_EMPTY(&chain->core.rbtree)) {
3083                 kprintf("\n");
3084         } else {
3085                 kprintf(" {\n");
3086                 RB_FOREACH(scan, hammer2_chain_tree, &chain->core.rbtree)
3087                         hammer2_dump_chain(scan, tab + 4, countp, 'a');
3088                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && chain->data)
3089                         kprintf("%*.*s}(%s)\n", tab, tab, "",
3090                                 chain->data->ipdata.filename);
3091                 else
3092                         kprintf("%*.*s}\n", tab, tab, "");
3093         }
3094 }