2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.34 2008/05/06 00:21:08 dillon Exp $
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/vnode.h>
41 #include <sys/mount.h>
42 #include <sys/malloc.h>
43 #include <sys/nlookup.h>
44 #include <sys/fcntl.h>
45 #include <sys/sysctl.h>
50 int hammer_debug_general;
51 int hammer_debug_debug;
52 int hammer_debug_inode;
53 int hammer_debug_locks;
54 int hammer_debug_btree;
56 int hammer_debug_recover; /* -1 will disable, +1 will force */
57 int hammer_debug_recover_faults;
58 int hammer_count_inodes;
59 int hammer_count_records;
60 int hammer_count_record_datas;
61 int hammer_count_volumes;
62 int hammer_count_buffers;
63 int hammer_count_nodes;
64 int hammer_count_dirtybufs; /* global */
65 int hammer_limit_dirtybufs = 100; /* per-mount */
67 int64_t hammer_contention_count;
68 int64_t hammer_zone_limit;
70 SYSCTL_NODE(_vfs, OID_AUTO, hammer, CTLFLAG_RW, 0, "HAMMER filesystem");
71 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_general, CTLFLAG_RW,
72 &hammer_debug_general, 0, "");
73 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_debug, CTLFLAG_RW,
74 &hammer_debug_debug, 0, "");
75 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_inode, CTLFLAG_RW,
76 &hammer_debug_inode, 0, "");
77 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_locks, CTLFLAG_RW,
78 &hammer_debug_locks, 0, "");
79 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_btree, CTLFLAG_RW,
80 &hammer_debug_btree, 0, "");
81 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_tid, CTLFLAG_RW,
82 &hammer_debug_tid, 0, "");
83 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_recover, CTLFLAG_RW,
84 &hammer_debug_recover, 0, "");
85 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_recover_faults, CTLFLAG_RW,
86 &hammer_debug_recover_faults, 0, "");
88 SYSCTL_INT(_vfs_hammer, OID_AUTO, limit_dirtybufs, CTLFLAG_RW,
89 &hammer_limit_dirtybufs, 0, "");
91 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_inodes, CTLFLAG_RD,
92 &hammer_count_inodes, 0, "");
93 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_records, CTLFLAG_RD,
94 &hammer_count_records, 0, "");
95 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_record_datas, CTLFLAG_RD,
96 &hammer_count_record_datas, 0, "");
97 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_volumes, CTLFLAG_RD,
98 &hammer_count_volumes, 0, "");
99 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_buffers, CTLFLAG_RD,
100 &hammer_count_buffers, 0, "");
101 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_nodes, CTLFLAG_RD,
102 &hammer_count_nodes, 0, "");
103 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_dirtybufs, CTLFLAG_RD,
104 &hammer_count_dirtybufs, 0, "");
105 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, zone_limit, CTLFLAG_RW,
106 &hammer_zone_limit, 0, "");
107 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, contention_count, CTLFLAG_RW,
108 &hammer_contention_count, 0, "");
113 static void hammer_free_hmp(struct mount *mp);
115 static int hammer_vfs_mount(struct mount *mp, char *path, caddr_t data,
117 static int hammer_vfs_unmount(struct mount *mp, int mntflags);
118 static int hammer_vfs_root(struct mount *mp, struct vnode **vpp);
119 static int hammer_vfs_statfs(struct mount *mp, struct statfs *sbp,
121 static int hammer_vfs_sync(struct mount *mp, int waitfor);
122 static int hammer_vfs_vget(struct mount *mp, ino_t ino,
124 static int hammer_vfs_init(struct vfsconf *conf);
125 static int hammer_vfs_fhtovp(struct mount *mp, struct fid *fhp,
127 static int hammer_vfs_vptofh(struct vnode *vp, struct fid *fhp);
128 static int hammer_vfs_checkexp(struct mount *mp, struct sockaddr *nam,
129 int *exflagsp, struct ucred **credanonp);
132 static struct vfsops hammer_vfsops = {
133 .vfs_mount = hammer_vfs_mount,
134 .vfs_unmount = hammer_vfs_unmount,
135 .vfs_root = hammer_vfs_root,
136 .vfs_statfs = hammer_vfs_statfs,
137 .vfs_sync = hammer_vfs_sync,
138 .vfs_vget = hammer_vfs_vget,
139 .vfs_init = hammer_vfs_init,
140 .vfs_vptofh = hammer_vfs_vptofh,
141 .vfs_fhtovp = hammer_vfs_fhtovp,
142 .vfs_checkexp = hammer_vfs_checkexp
145 MALLOC_DEFINE(M_HAMMER, "hammer-mount", "hammer mount");
147 VFS_SET(hammer_vfsops, hammer, 0);
148 MODULE_VERSION(hammer, 1);
151 hammer_vfs_init(struct vfsconf *conf)
153 /*hammer_init_alist_config();*/
158 hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data,
161 struct hammer_mount_info info;
163 hammer_volume_t rootvol;
164 struct vnode *rootvp;
165 const char *upath; /* volume name in userspace */
166 char *path; /* volume name in system space */
170 if ((error = copyin(data, &info, sizeof(info))) != 0)
172 if (info.nvolumes <= 0 || info.nvolumes >= 32768)
176 * Interal mount data structure
178 if (mp->mnt_flag & MNT_UPDATE) {
179 hmp = (void *)mp->mnt_data;
180 KKASSERT(hmp != NULL);
182 hmp = kmalloc(sizeof(*hmp), M_HAMMER, M_WAITOK | M_ZERO);
183 mp->mnt_data = (qaddr_t)hmp;
185 hmp->zbuf = kmalloc(HAMMER_BUFSIZE, M_HAMMER, M_WAITOK|M_ZERO);
186 hmp->namekey_iterator = mycpu->gd_time_seconds;
187 /*TAILQ_INIT(&hmp->recycle_list);*/
189 hmp->root_btree_beg.obj_id = -0x8000000000000000LL;
190 hmp->root_btree_beg.key = -0x8000000000000000LL;
191 hmp->root_btree_beg.create_tid = 1;
192 hmp->root_btree_beg.delete_tid = 1;
193 hmp->root_btree_beg.rec_type = 0;
194 hmp->root_btree_beg.obj_type = 0;
196 hmp->root_btree_end.obj_id = 0x7FFFFFFFFFFFFFFFLL;
197 hmp->root_btree_end.key = 0x7FFFFFFFFFFFFFFFLL;
198 hmp->root_btree_end.create_tid = 0xFFFFFFFFFFFFFFFFULL;
199 hmp->root_btree_end.delete_tid = 0; /* special case */
200 hmp->root_btree_end.rec_type = 0xFFFFU;
201 hmp->root_btree_end.obj_type = 0;
202 lockinit(&hmp->blockmap_lock, "blkmap", 0, 0);
204 hmp->sync_lock.refs = 1;
205 hmp->free_lock.refs = 1;
207 TAILQ_INIT(&hmp->flush_list);
208 TAILQ_INIT(&hmp->objid_cache_list);
209 TAILQ_INIT(&hmp->undo_lru_list);
212 * Set default zone limits. This value can be reduced
213 * further by the zone limit specified in the root volume.
215 * The sysctl can force a small zone limit for debugging
218 for (i = 0; i < HAMMER_MAX_ZONES; ++i) {
219 hmp->zone_limits[i] =
220 HAMMER_ZONE_ENCODE(i, HAMMER_ZONE_LIMIT);
222 if (hammer_zone_limit) {
223 hmp->zone_limits[i] =
224 HAMMER_ZONE_ENCODE(i, hammer_zone_limit);
226 hammer_init_holes(hmp, &hmp->holes[i]);
229 hmp->hflags = info.hflags;
231 mp->mnt_flag |= MNT_RDONLY;
232 hmp->asof = info.asof;
234 hmp->asof = HAMMER_MAX_TID;
238 * Re-open read-write if originally read-only, or vise-versa XXX
240 if (mp->mnt_flag & MNT_UPDATE) {
241 if (hmp->ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
242 kprintf("HAMMER read-write -> read-only XXX\n");
244 } else if (hmp->ronly && (mp->mnt_flag & MNT_RDONLY) == 0) {
245 kprintf("HAMMER read-only -> read-write XXX\n");
251 RB_INIT(&hmp->rb_vols_root);
252 RB_INIT(&hmp->rb_inos_root);
253 RB_INIT(&hmp->rb_nods_root);
254 RB_INIT(&hmp->rb_undo_root);
255 hmp->ronly = ((mp->mnt_flag & MNT_RDONLY) != 0);
257 TAILQ_INIT(&hmp->volu_list);
258 TAILQ_INIT(&hmp->undo_list);
259 TAILQ_INIT(&hmp->data_list);
260 TAILQ_INIT(&hmp->meta_list);
261 TAILQ_INIT(&hmp->lose_list);
266 path = objcache_get(namei_oc, M_WAITOK);
267 hmp->nvolumes = info.nvolumes;
268 for (i = 0; i < info.nvolumes; ++i) {
269 error = copyin(&info.volumes[i], &upath, sizeof(char *));
271 error = copyinstr(upath, path, MAXPATHLEN, NULL);
273 error = hammer_install_volume(hmp, path);
277 objcache_put(namei_oc, path);
280 * Make sure we found a root volume
282 if (error == 0 && hmp->rootvol == NULL) {
283 kprintf("hammer_mount: No root volume found!\n");
292 * No errors, setup enough of the mount point so we can lookup the
295 mp->mnt_iosize_max = MAXPHYS;
296 mp->mnt_kern_flag |= MNTK_FSMID;
299 * note: f_iosize is used by vnode_pager_haspage() when constructing
302 mp->mnt_stat.f_iosize = HAMMER_BUFSIZE;
303 mp->mnt_stat.f_bsize = HAMMER_BUFSIZE;
304 mp->mnt_maxsymlinklen = 255;
305 mp->mnt_flag |= MNT_LOCAL;
307 vfs_add_vnodeops(mp, &hammer_vnode_vops, &mp->mnt_vn_norm_ops);
308 vfs_add_vnodeops(mp, &hammer_spec_vops, &mp->mnt_vn_spec_ops);
309 vfs_add_vnodeops(mp, &hammer_fifo_vops, &mp->mnt_vn_fifo_ops);
312 * The root volume's ondisk pointer is only valid if we hold a
315 rootvol = hammer_get_root_volume(hmp, &error);
320 * Perform any necessary UNDO operations. The recover code does
321 * call hammer_undo_lookup() so we have to pre-cache the blockmap,
322 * and then re-copy it again after recovery is complete.
324 * The recover code will load hmp->flusher_undo_start.
326 bcopy(rootvol->ondisk->vol0_blockmap, hmp->blockmap,
327 sizeof(hmp->blockmap));
329 error = hammer_recover(hmp, rootvol);
331 kprintf("Failed to recover HAMMER filesystem on mount\n");
336 * Finish setup now that we have a good root volume
338 ksnprintf(mp->mnt_stat.f_mntfromname,
339 sizeof(mp->mnt_stat.f_mntfromname), "%s",
340 rootvol->ondisk->vol_name);
341 mp->mnt_stat.f_fsid.val[0] =
342 crc32((char *)&rootvol->ondisk->vol_fsid + 0, 8);
343 mp->mnt_stat.f_fsid.val[1] =
344 crc32((char *)&rootvol->ondisk->vol_fsid + 8, 8);
347 * Certain often-modified fields in the root volume are cached in
348 * the hammer_mount structure so we do not have to generate lots
349 * of little UNDO structures for them.
351 * Recopy after recovery.
353 hmp->next_tid = rootvol->ondisk->vol0_next_tid;
354 bcopy(rootvol->ondisk->vol0_blockmap, hmp->blockmap,
355 sizeof(hmp->blockmap));
358 * Use the zone limit set by newfs_hammer, or the zone limit set by
359 * sysctl (for debugging), whichever is smaller.
361 if (rootvol->ondisk->vol0_zone_limit) {
362 hammer_off_t vol0_zone_limit;
364 vol0_zone_limit = rootvol->ondisk->vol0_zone_limit;
365 for (i = 0; i < HAMMER_MAX_ZONES; ++i) {
366 if (hmp->zone_limits[i] > vol0_zone_limit)
367 hmp->zone_limits[i] = vol0_zone_limit;
371 hammer_flusher_create(hmp);
374 * Locate the root directory using the root cluster's B-Tree as a
375 * starting point. The root directory uses an obj_id of 1.
377 * FUTURE: Leave the root directory cached referenced but unlocked
378 * in hmp->rootvp (need to flush it on unmount).
380 error = hammer_vfs_vget(mp, 1, &rootvp);
384 /*vn_unlock(hmp->rootvp);*/
387 hammer_rel_volume(rootvol, 0);
390 * Cleanup and return.
398 hammer_vfs_unmount(struct mount *mp, int mntflags)
401 struct hammer_mount *hmp = (void *)mp->mnt_data;
407 * Clean out the vnodes
410 if (mntflags & MNT_FORCE)
412 if ((error = vflush(mp, 0, flags)) != 0)
416 * Clean up the internal mount structure and related entities. This
424 * Clean up the internal mount structure and disassociate it from the mount.
425 * This may issue I/O.
428 hammer_free_hmp(struct mount *mp)
430 struct hammer_mount *hmp = (void *)mp->mnt_data;
435 * Clean up the root vnode
442 hammer_flusher_sync(hmp);
443 hammer_flusher_sync(hmp);
444 hammer_flusher_destroy(hmp);
446 KKASSERT(RB_EMPTY(&hmp->rb_inos_root));
450 * Unload & flush inodes
452 * XXX illegal to call this from here, it can only be done from
455 RB_SCAN(hammer_ino_rb_tree, &hmp->rb_inos_root, NULL,
456 hammer_unload_inode, (void *)MNT_WAIT);
459 * Unload & flush volumes
465 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
466 hammer_unload_volume, NULL);
469 mp->mnt_flag &= ~MNT_LOCAL;
471 hammer_destroy_objid_cache(hmp);
472 kfree(hmp->zbuf, M_HAMMER);
473 lockuninit(&hmp->blockmap_lock);
475 for (i = 0; i < HAMMER_MAX_ZONES; ++i)
476 hammer_free_holes(hmp, &hmp->holes[i]);
478 kfree(hmp, M_HAMMER);
482 * Obtain a vnode for the specified inode number. An exclusively locked
486 hammer_vfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
488 struct hammer_transaction trans;
489 struct hammer_mount *hmp = (void *)mp->mnt_data;
490 struct hammer_inode *ip;
493 hammer_simple_transaction(&trans, hmp);
496 * Lookup the requested HAMMER inode. The structure must be
497 * left unlocked while we manipulate the related vnode to avoid
500 ip = hammer_get_inode(&trans, NULL, ino, hmp->asof, 0, &error);
505 error = hammer_get_vnode(ip, vpp);
506 hammer_rel_inode(ip, 0);
507 hammer_done_transaction(&trans);
512 * Return the root vnode for the filesystem.
514 * HAMMER stores the root vnode in the hammer_mount structure so
515 * getting it is easy.
518 hammer_vfs_root(struct mount *mp, struct vnode **vpp)
521 struct hammer_mount *hmp = (void *)mp->mnt_data;
525 error = hammer_vfs_vget(mp, 1, vpp);
530 hammer_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
532 struct hammer_mount *hmp = (void *)mp->mnt_data;
533 hammer_volume_t volume;
534 hammer_volume_ondisk_t ondisk;
538 volume = hammer_get_root_volume(hmp, &error);
541 ondisk = volume->ondisk;
546 mp->mnt_stat.f_files = ondisk->vol0_stat_inodes;
547 bfree = ondisk->vol0_stat_freebigblocks * HAMMER_LARGEBLOCK_SIZE;
548 hammer_rel_volume(volume, 0);
550 mp->mnt_stat.f_bfree = bfree / HAMMER_BUFSIZE;
551 mp->mnt_stat.f_bavail = mp->mnt_stat.f_bfree;
552 if (mp->mnt_stat.f_files < 0)
553 mp->mnt_stat.f_files = 0;
560 * Sync the filesystem. Currently we have to run it twice, the second
561 * one will advance the undo start index to the end index, so if a crash
562 * occurs no undos will be run on mount.
564 * We do not sync the filesystem if we are called from a panic. If we did
565 * we might end up blowing up a sync that was already in progress.
568 hammer_vfs_sync(struct mount *mp, int waitfor)
570 struct hammer_mount *hmp = (void *)mp->mnt_data;
573 if (panicstr == NULL) {
574 error = hammer_sync_hmp(hmp, waitfor);
576 error = hammer_sync_hmp(hmp, waitfor);
585 * Convert a vnode to a file handle.
588 hammer_vfs_vptofh(struct vnode *vp, struct fid *fhp)
592 KKASSERT(MAXFIDSZ >= 16);
594 fhp->fid_len = offsetof(struct fid, fid_data[16]);
595 fhp->fid_reserved = 0;
596 bcopy(&ip->obj_id, fhp->fid_data + 0, sizeof(ip->obj_id));
597 bcopy(&ip->obj_asof, fhp->fid_data + 8, sizeof(ip->obj_asof));
603 * Convert a file handle back to a vnode.
606 hammer_vfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
608 struct hammer_transaction trans;
609 struct hammer_inode *ip;
610 struct hammer_inode_info info;
613 bcopy(fhp->fid_data + 0, &info.obj_id, sizeof(info.obj_id));
614 bcopy(fhp->fid_data + 8, &info.obj_asof, sizeof(info.obj_asof));
616 hammer_simple_transaction(&trans, (void *)mp->mnt_data);
619 * Get/allocate the hammer_inode structure. The structure must be
620 * unlocked while we manipulate the related vnode to avoid a
623 ip = hammer_get_inode(&trans, NULL, info.obj_id, info.obj_asof,
629 error = hammer_get_vnode(ip, vpp);
630 hammer_rel_inode(ip, 0);
631 hammer_done_transaction(&trans);
636 hammer_vfs_checkexp(struct mount *mp, struct sockaddr *nam,
637 int *exflagsp, struct ucred **credanonp)
639 hammer_mount_t hmp = (void *)mp->mnt_data;
643 np = vfs_export_lookup(mp, &hmp->export, nam);
645 *exflagsp = np->netc_exflags;
646 *credanonp = &np->netc_anon;
656 hammer_vfs_export(struct mount *mp, int op, const struct export_args *export)
658 hammer_mount_t hmp = (void *)mp->mnt_data;
662 case MOUNTCTL_SET_EXPORT:
663 error = vfs_export(mp, &hmp->export, export);