HAMMER 28/many: Implement zoned blockmap
[dragonfly.git] / sys / vfs / hammer / hammer_vfsops.c
CommitLineData
427e5fc6
MD
1/*
2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
40043e7f 34 * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.19 2008/02/10 09:51:01 dillon Exp $
427e5fc6
MD
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/vnode.h>
41#include <sys/mount.h>
42#include <sys/malloc.h>
43#include <sys/nlookup.h>
44#include <sys/fcntl.h>
b3deaf57 45#include <sys/sysctl.h>
427e5fc6
MD
46#include <sys/buf.h>
47#include <sys/buf2.h>
48#include "hammer.h"
49
d5ef456e 50int hammer_debug_general;
b3deaf57 51int hammer_debug_btree;
d113fda1 52int hammer_debug_tid;
46fe7ae1
MD
53int hammer_debug_recover; /* -1 will disable, +1 will force */
54int hammer_debug_recover_faults;
b3deaf57
MD
55int hammer_count_inodes;
56int hammer_count_records;
57int hammer_count_record_datas;
58int hammer_count_volumes;
b3deaf57
MD
59int hammer_count_buffers;
60int hammer_count_nodes;
b3deaf57
MD
61
62SYSCTL_NODE(_vfs, OID_AUTO, hammer, CTLFLAG_RW, 0, "HAMMER filesystem");
d5ef456e
MD
63SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_general, CTLFLAG_RW,
64 &hammer_debug_general, 0, "");
b3deaf57
MD
65SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_btree, CTLFLAG_RW,
66 &hammer_debug_btree, 0, "");
d113fda1
MD
67SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_tid, CTLFLAG_RW,
68 &hammer_debug_tid, 0, "");
b33e2cc0
MD
69SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_recover, CTLFLAG_RW,
70 &hammer_debug_recover, 0, "");
46fe7ae1
MD
71SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_recover_faults, CTLFLAG_RW,
72 &hammer_debug_recover_faults, 0, "");
b3deaf57
MD
73SYSCTL_INT(_vfs_hammer, OID_AUTO, count_inodes, CTLFLAG_RD,
74 &hammer_count_inodes, 0, "");
75SYSCTL_INT(_vfs_hammer, OID_AUTO, count_records, CTLFLAG_RD,
76 &hammer_count_records, 0, "");
77SYSCTL_INT(_vfs_hammer, OID_AUTO, count_record_datas, CTLFLAG_RD,
78 &hammer_count_record_datas, 0, "");
79SYSCTL_INT(_vfs_hammer, OID_AUTO, count_volumes, CTLFLAG_RD,
80 &hammer_count_volumes, 0, "");
b3deaf57
MD
81SYSCTL_INT(_vfs_hammer, OID_AUTO, count_buffers, CTLFLAG_RD,
82 &hammer_count_buffers, 0, "");
83SYSCTL_INT(_vfs_hammer, OID_AUTO, count_nodes, CTLFLAG_RD,
84 &hammer_count_nodes, 0, "");
b3deaf57 85
427e5fc6
MD
86/*
87 * VFS ABI
88 */
89static void hammer_free_hmp(struct mount *mp);
90
91static int hammer_vfs_mount(struct mount *mp, char *path, caddr_t data,
92 struct ucred *cred);
93static int hammer_vfs_unmount(struct mount *mp, int mntflags);
94static int hammer_vfs_root(struct mount *mp, struct vnode **vpp);
95static int hammer_vfs_statfs(struct mount *mp, struct statfs *sbp,
96 struct ucred *cred);
97static int hammer_vfs_sync(struct mount *mp, int waitfor);
513ca7d7
MD
98static int hammer_vfs_vget(struct mount *mp, ino_t ino,
99 struct vnode **vpp);
427e5fc6 100static int hammer_vfs_init(struct vfsconf *conf);
513ca7d7
MD
101static int hammer_vfs_fhtovp(struct mount *mp, struct fid *fhp,
102 struct vnode **vpp);
103static int hammer_vfs_vptofh(struct vnode *vp, struct fid *fhp);
104static int hammer_vfs_checkexp(struct mount *mp, struct sockaddr *nam,
105 int *exflagsp, struct ucred **credanonp);
106
427e5fc6
MD
107
108static struct vfsops hammer_vfsops = {
109 .vfs_mount = hammer_vfs_mount,
110 .vfs_unmount = hammer_vfs_unmount,
111 .vfs_root = hammer_vfs_root,
112 .vfs_statfs = hammer_vfs_statfs,
113 .vfs_sync = hammer_vfs_sync,
114 .vfs_vget = hammer_vfs_vget,
513ca7d7
MD
115 .vfs_init = hammer_vfs_init,
116 .vfs_vptofh = hammer_vfs_vptofh,
117 .vfs_fhtovp = hammer_vfs_fhtovp,
118 .vfs_checkexp = hammer_vfs_checkexp
427e5fc6
MD
119};
120
121MALLOC_DEFINE(M_HAMMER, "hammer-mount", "hammer mount");
122
123VFS_SET(hammer_vfsops, hammer, 0);
124MODULE_VERSION(hammer, 1);
125
126static int
127hammer_vfs_init(struct vfsconf *conf)
128{
47197d71 129 /*hammer_init_alist_config();*/
427e5fc6
MD
130 return(0);
131}
132
133static int
134hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data,
135 struct ucred *cred)
136{
137 struct hammer_mount_info info;
a89aec1b
MD
138 hammer_mount_t hmp;
139 hammer_volume_t rootvol;
27ea2398 140 struct vnode *rootvp;
427e5fc6
MD
141 const char *upath; /* volume name in userspace */
142 char *path; /* volume name in system space */
143 int error;
144 int i;
145
146 if ((error = copyin(data, &info, sizeof(info))) != 0)
147 return (error);
148 if (info.nvolumes <= 0 || info.nvolumes >= 32768)
149 return (EINVAL);
150
151 /*
152 * Interal mount data structure
153 */
195c19a1
MD
154 if (mp->mnt_flag & MNT_UPDATE) {
155 hmp = (void *)mp->mnt_data;
156 KKASSERT(hmp != NULL);
157 } else {
158 hmp = kmalloc(sizeof(*hmp), M_HAMMER, M_WAITOK | M_ZERO);
159 mp->mnt_data = (qaddr_t)hmp;
160 hmp->mp = mp;
161 hmp->zbuf = kmalloc(HAMMER_BUFSIZE, M_HAMMER, M_WAITOK|M_ZERO);
162 hmp->namekey_iterator = mycpu->gd_time_seconds;
46fe7ae1 163 /*TAILQ_INIT(&hmp->recycle_list);*/
47197d71
MD
164
165 hmp->root_btree_beg.obj_id = -0x8000000000000000LL;
166 hmp->root_btree_beg.key = -0x8000000000000000LL;
167 hmp->root_btree_beg.create_tid = 1;
168 hmp->root_btree_beg.delete_tid = 1;
169 hmp->root_btree_beg.rec_type = 0;
170 hmp->root_btree_beg.obj_type = 0;
171
172 hmp->root_btree_end.obj_id = 0x7FFFFFFFFFFFFFFFLL;
173 hmp->root_btree_end.key = 0x7FFFFFFFFFFFFFFFLL;
174 hmp->root_btree_end.create_tid = 0xFFFFFFFFFFFFFFFFULL;
175 hmp->root_btree_end.delete_tid = 0; /* special case */
176 hmp->root_btree_end.rec_type = 0xFFFFU;
177 hmp->root_btree_end.obj_type = 0;
40043e7f 178 lockinit(&hmp->blockmap_lock, "blkmap", 0, 0);
195c19a1
MD
179 }
180 hmp->hflags = info.hflags;
7f7c1f84
MD
181 if (info.asof) {
182 mp->mnt_flag |= MNT_RDONLY;
183 hmp->asof = info.asof;
184 } else {
185 hmp->asof = HAMMER_MAX_TID;
186 }
195c19a1
MD
187
188 /*
189 * Re-open read-write if originally read-only, or vise-versa XXX
190 */
191 if (mp->mnt_flag & MNT_UPDATE) {
192 if (hmp->ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
193 kprintf("HAMMER read-write -> read-only XXX\n");
194 hmp->ronly = 1;
195 } else if (hmp->ronly && (mp->mnt_flag & MNT_RDONLY) == 0) {
196 kprintf("HAMMER read-only -> read-write XXX\n");
197 hmp->ronly = 0;
198 }
199 return(0);
200 }
201
427e5fc6
MD
202 RB_INIT(&hmp->rb_vols_root);
203 RB_INIT(&hmp->rb_inos_root);
40043e7f 204 RB_INIT(&hmp->rb_nods_root);
195c19a1 205 hmp->ronly = ((mp->mnt_flag & MNT_RDONLY) != 0);
427e5fc6
MD
206
207 /*
208 * Load volumes
209 */
210 path = objcache_get(namei_oc, M_WAITOK);
d26d0ae9 211 hmp->nvolumes = info.nvolumes;
427e5fc6
MD
212 for (i = 0; i < info.nvolumes; ++i) {
213 error = copyin(&info.volumes[i], &upath, sizeof(char *));
214 if (error == 0)
215 error = copyinstr(upath, path, MAXPATHLEN, NULL);
216 if (error == 0)
8cd0a023 217 error = hammer_install_volume(hmp, path);
427e5fc6
MD
218 if (error)
219 break;
220 }
221 objcache_put(namei_oc, path);
222
223 /*
224 * Make sure we found a root volume
225 */
226 if (error == 0 && hmp->rootvol == NULL) {
227 kprintf("hammer_mount: No root volume found!\n");
228 error = EINVAL;
229 }
427e5fc6
MD
230 if (error) {
231 hammer_free_hmp(mp);
232 return (error);
233 }
234
235 /*
27ea2398
MD
236 * No errors, setup enough of the mount point so we can lookup the
237 * root vnode.
427e5fc6 238 */
27ea2398
MD
239 mp->mnt_iosize_max = MAXPHYS;
240 mp->mnt_kern_flag |= MNTK_FSMID;
c0ade690
MD
241
242 /*
243 * note: f_iosize is used by vnode_pager_haspage() when constructing
244 * its VOP_BMAP call.
245 */
246 mp->mnt_stat.f_iosize = HAMMER_BUFSIZE;
fbc6e32a 247 mp->mnt_stat.f_bsize = HAMMER_BUFSIZE;
27ea2398
MD
248 mp->mnt_maxsymlinklen = 255;
249 mp->mnt_flag |= MNT_LOCAL;
250
251 vfs_add_vnodeops(mp, &hammer_vnode_vops, &mp->mnt_vn_norm_ops);
7a04d74f
MD
252 vfs_add_vnodeops(mp, &hammer_spec_vops, &mp->mnt_vn_spec_ops);
253 vfs_add_vnodeops(mp, &hammer_fifo_vops, &mp->mnt_vn_fifo_ops);
27ea2398 254
a89aec1b
MD
255 /*
256 * The root volume's ondisk pointer is only valid if we hold a
257 * reference to it.
258 */
259 rootvol = hammer_get_root_volume(hmp, &error);
260 if (error)
261 goto done;
262 ksnprintf(mp->mnt_stat.f_mntfromname,
263 sizeof(mp->mnt_stat.f_mntfromname), "%s",
264 rootvol->ondisk->vol_name);
513ca7d7
MD
265 mp->mnt_stat.f_fsid.val[0] =
266 crc32((char *)&rootvol->ondisk->vol_fsid + 0, 8);
267 mp->mnt_stat.f_fsid.val[1] =
268 crc32((char *)&rootvol->ondisk->vol_fsid + 8, 8);
a89aec1b
MD
269 hammer_rel_volume(rootvol, 0);
270
27ea2398
MD
271 /*
272 * Locate the root directory using the root cluster's B-Tree as a
273 * starting point. The root directory uses an obj_id of 1.
274 *
275 * FUTURE: Leave the root directory cached referenced but unlocked
276 * in hmp->rootvp (need to flush it on unmount).
277 */
278 error = hammer_vfs_vget(mp, 1, &rootvp);
a89aec1b
MD
279 if (error)
280 goto done;
281 vput(rootvp);
27ea2398 282 /*vn_unlock(hmp->rootvp);*/
427e5fc6 283
a89aec1b 284done:
427e5fc6 285 /*
27ea2398 286 * Cleanup and return.
427e5fc6 287 */
27ea2398 288 if (error)
427e5fc6 289 hammer_free_hmp(mp);
427e5fc6
MD
290 return (error);
291}
292
293static int
294hammer_vfs_unmount(struct mount *mp, int mntflags)
295{
296#if 0
297 struct hammer_mount *hmp = (void *)mp->mnt_data;
298#endif
299 int flags;
66325755 300 int error;
27ea2398 301
427e5fc6
MD
302 /*
303 * Clean out the vnodes
304 */
66325755
MD
305 flags = 0;
306 if (mntflags & MNT_FORCE)
307 flags |= FORCECLOSE;
308 if ((error = vflush(mp, 0, flags)) != 0)
309 return (error);
427e5fc6
MD
310
311 /*
312 * Clean up the internal mount structure and related entities. This
313 * may issue I/O.
314 */
315 hammer_free_hmp(mp);
316 return(0);
317}
318
319/*
320 * Clean up the internal mount structure and disassociate it from the mount.
321 * This may issue I/O.
322 */
323static void
324hammer_free_hmp(struct mount *mp)
325{
326 struct hammer_mount *hmp = (void *)mp->mnt_data;
327
27ea2398 328#if 0
427e5fc6
MD
329 /*
330 * Clean up the root vnode
331 */
332 if (hmp->rootvp) {
333 vrele(hmp->rootvp);
334 hmp->rootvp = NULL;
335 }
27ea2398 336#endif
427e5fc6
MD
337
338 /*
339 * Unload & flush inodes
340 */
341 RB_SCAN(hammer_ino_rb_tree, &hmp->rb_inos_root, NULL,
d113fda1 342 hammer_unload_inode, (void *)MNT_WAIT);
427e5fc6
MD
343
344 /*
345 * Unload & flush volumes
346 */
347 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
348 hammer_unload_volume, NULL);
349
350 mp->mnt_data = NULL;
66325755 351 mp->mnt_flag &= ~MNT_LOCAL;
427e5fc6 352 hmp->mp = NULL;
66325755 353 kfree(hmp->zbuf, M_HAMMER);
40043e7f 354 lockuninit(&hmp->blockmap_lock);
427e5fc6
MD
355 kfree(hmp, M_HAMMER);
356}
357
513ca7d7
MD
358/*
359 * Obtain a vnode for the specified inode number. An exclusively locked
360 * vnode is returned.
361 */
362int
363hammer_vfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
364{
365 struct hammer_mount *hmp = (void *)mp->mnt_data;
366 struct hammer_inode *ip;
367 int error;
368
369 /*
370 * Get/allocate the hammer_inode structure. The structure must be
371 * unlocked while we manipulate the related vnode to avoid a
372 * deadlock.
373 */
374 ip = hammer_get_inode(hmp, NULL, ino, hmp->asof, 0, &error);
375 if (ip == NULL) {
376 *vpp = NULL;
377 return(error);
378 }
379 error = hammer_get_vnode(ip, LK_EXCLUSIVE, vpp);
380 hammer_rel_inode(ip, 0);
381 return (error);
382}
383
427e5fc6
MD
384/*
385 * Return the root vnode for the filesystem.
386 *
387 * HAMMER stores the root vnode in the hammer_mount structure so
388 * getting it is easy.
389 */
390static int
391hammer_vfs_root(struct mount *mp, struct vnode **vpp)
392{
47197d71 393#if 0
427e5fc6 394 struct hammer_mount *hmp = (void *)mp->mnt_data;
47197d71 395#endif
27ea2398 396 int error;
427e5fc6 397
47197d71 398 error = hammer_vfs_vget(mp, 1, vpp);
27ea2398 399 return (error);
427e5fc6
MD
400}
401
402static int
403hammer_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
404{
fbc6e32a
MD
405 struct hammer_mount *hmp = (void *)mp->mnt_data;
406 hammer_volume_t volume;
407 hammer_volume_ondisk_t ondisk;
408 int error;
47197d71 409 int64_t bfree;
fbc6e32a
MD
410
411 volume = hammer_get_root_volume(hmp, &error);
412 if (error)
413 return(error);
fbc6e32a
MD
414 ondisk = volume->ondisk;
415
47197d71
MD
416 /*
417 * Basic stats
418 */
fbc6e32a 419 mp->mnt_stat.f_files = ondisk->vol0_stat_inodes;
40043e7f
MD
420 bfree = 0;
421 hammer_rel_volume(volume, 0);
422#if 0
47197d71
MD
423 fifo_beg = ondisk->vol0_fifo_beg;
424 fifo_end = ondisk->vol0_fifo_end;
47197d71
MD
425
426 /*
427 * Calculate how many free blocks we have by counting the
428 * blocks between fifo_end and fifo_beg.
429 */
430 bfree = 0;
431 vol_no = HAMMER_VOL_DECODE(fifo_end);
432 for (;;) {
433 if (vol_no == HAMMER_VOL_DECODE(fifo_beg) &&
434 fifo_end <= fifo_beg) {
435 bfree += (fifo_beg - fifo_end) & HAMMER_OFF_SHORT_MASK;
436 break;
437 }
438 volume = hammer_get_volume(hmp, vol_no, &error);
439 if (volume == NULL)
440 break;
441 bfree += volume->maxbuf_off - fifo_end;
442 if (++vol_no == hmp->nvolumes)
443 vol_no = 0;
444 fifo_end = HAMMER_ENCODE_RAW_BUFFER(vol_no, 0);
445 hammer_rel_volume(volume, 0);
446 }
40043e7f 447#endif
47197d71
MD
448 mp->mnt_stat.f_bfree = bfree / HAMMER_BUFSIZE;
449 mp->mnt_stat.f_bavail = mp->mnt_stat.f_bfree;
fbc6e32a
MD
450 if (mp->mnt_stat.f_files < 0)
451 mp->mnt_stat.f_files = 0;
452
27ea2398
MD
453 *sbp = mp->mnt_stat;
454 return(0);
427e5fc6
MD
455}
456
457static int
458hammer_vfs_sync(struct mount *mp, int waitfor)
459{
fbc6e32a
MD
460 struct hammer_mount *hmp = (void *)mp->mnt_data;
461 int error;
462
463 error = hammer_sync_hmp(hmp, waitfor);
464 return(error);
427e5fc6
MD
465}
466
513ca7d7
MD
467/*
468 * Convert a vnode to a file handle.
469 */
470static int
471hammer_vfs_vptofh(struct vnode *vp, struct fid *fhp)
472{
473 hammer_inode_t ip;
474
475 KKASSERT(MAXFIDSZ >= 16);
476 ip = VTOI(vp);
477 fhp->fid_len = offsetof(struct fid, fid_data[16]);
478 fhp->fid_reserved = 0;
479 bcopy(&ip->obj_id, fhp->fid_data + 0, sizeof(ip->obj_id));
480 bcopy(&ip->obj_asof, fhp->fid_data + 8, sizeof(ip->obj_asof));
481 return(0);
482}
483
484
485/*
486 * Convert a file handle back to a vnode.
487 */
488static int
489hammer_vfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
490{
491 struct hammer_mount *hmp = (void *)mp->mnt_data;
492 struct hammer_inode *ip;
493 struct hammer_inode_info info;
494 int error;
495
496 bcopy(fhp->fid_data + 0, &info.obj_id, sizeof(info.obj_id));
497 bcopy(fhp->fid_data + 8, &info.obj_asof, sizeof(info.obj_asof));
498
499 /*
500 * Get/allocate the hammer_inode structure. The structure must be
501 * unlocked while we manipulate the related vnode to avoid a
502 * deadlock.
503 */
504 ip = hammer_get_inode(hmp, NULL, info.obj_id, info.obj_asof, 0, &error);
505 if (ip == NULL) {
506 *vpp = NULL;
507 return(error);
508 }
509 error = hammer_get_vnode(ip, LK_EXCLUSIVE, vpp);
510 hammer_rel_inode(ip, 0);
511 return (error);
512}
513
514static int
515hammer_vfs_checkexp(struct mount *mp, struct sockaddr *nam,
516 int *exflagsp, struct ucred **credanonp)
517{
518 hammer_mount_t hmp = (void *)mp->mnt_data;
519 struct netcred *np;
520 int error;
521
522 np = vfs_export_lookup(mp, &hmp->export, nam);
523 if (np) {
524 *exflagsp = np->netc_exflags;
525 *credanonp = &np->netc_anon;
526 error = 0;
527 } else {
528 error = EACCES;
529 }
530 return (error);
531
532}
533
534int
535hammer_vfs_export(struct mount *mp, int op, const struct export_args *export)
536{
537 hammer_mount_t hmp = (void *)mp->mnt_data;
538 int error;
539
540 switch(op) {
541 case MOUNTCTL_SET_EXPORT:
542 error = vfs_export(mp, &hmp->export, export);
543 break;
544 default:
545 error = EOPNOTSUPP;
546 break;
547 }
548 return(error);
549}
550