2 * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/fcntl.h>
41 #include <sys/namei.h>
42 #include <sys/mount.h>
43 #include <sys/vnode.h>
44 #include <sys/mountctl.h>
45 #include <sys/dirent.h>
49 #define ZFOFFSET (-2LL)
52 * Last reference to a vnode is going away but it is still cached.
56 hammer2_vop_inactive(struct vop_inactive_args *ap)
59 struct hammer2_inode *ip;
61 struct hammer2_mount *hmp;
79 * Reclaim a vnode so that it can be reused; after the inode is
80 * disassociated, the filesystem must manage it alone.
84 hammer2_vop_reclaim(struct vop_reclaim_args *ap)
86 struct hammer2_inode *ip;
87 struct hammer2_mount *hmp;
96 hammer2_inode_lock_ex(ip);
99 hammer2_chain_flush(hmp, &ip->chain, NULL);
100 hammer2_inode_unlock_ex(ip);
101 hammer2_chain_drop(hmp, &ip->chain); /* vp ref removed */
104 * XXX handle background sync when ip dirty, kernel will no longer
105 * notify us regarding this inode because there is no longer a
106 * vnode attached to it.
114 hammer2_vop_fsync(struct vop_fsync_args *ap)
116 struct hammer2_inode *ip;
117 struct hammer2_mount *hmp;
124 hammer2_inode_lock_ex(ip);
125 vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
126 hammer2_chain_flush(hmp, &ip->chain, NULL);
127 hammer2_inode_unlock_ex(ip);
133 hammer2_vop_access(struct vop_access_args *ap)
135 hammer2_inode_t *ip = VTOI(ap->a_vp);
140 uid = hammer2_to_unix_xid(&ip->ip_data.uid);
141 gid = hammer2_to_unix_xid(&ip->ip_data.gid);
143 error = vop_helper_access(ap, uid, gid, ip->ip_data.mode,
150 hammer2_vop_getattr(struct vop_getattr_args *ap)
152 hammer2_mount_t *hmp;
163 hammer2_inode_lock_sh(ip);
165 vap->va_fsid = hmp->mp->mnt_stat.f_fsid.val[0];
166 vap->va_fileid = ip->ip_data.inum;
167 vap->va_mode = ip->ip_data.mode;
168 vap->va_nlink = ip->ip_data.nlinks;
173 vap->va_size = ip->ip_data.size;
174 vap->va_blocksize = HAMMER2_PBUFSIZE;
175 vap->va_flags = ip->ip_data.uflags;
176 hammer2_time_to_timespec(ip->ip_data.ctime, &vap->va_ctime);
177 hammer2_time_to_timespec(ip->ip_data.mtime, &vap->va_mtime);
178 hammer2_time_to_timespec(ip->ip_data.mtime, &vap->va_atime);
180 vap->va_bytes = vap->va_size;
181 vap->va_type = hammer2_get_vtype(ip);
183 vap->va_uid_uuid = ip->ip_data.uid;
184 vap->va_gid_uuid = ip->ip_data.gid;
185 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
188 hammer2_inode_unlock_sh(ip);
195 hammer2_vop_readdir(struct vop_readdir_args *ap)
197 hammer2_mount_t *hmp;
199 hammer2_inode_t *xip;
200 hammer2_chain_t *parent;
201 hammer2_chain_t *chain;
215 saveoff = uio->uio_offset;
218 * Setup cookies directory entry cookies if requested
220 if (ap->a_ncookies) {
221 ncookies = uio->uio_resid / 16 + 1;
224 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
232 * Handle artificial entries. To ensure that only positive 64 bit
233 * quantities are returned to userland we always strip off bit 63.
234 * The hash code is designed such that codes 0x0000-0x7FFF are not
235 * used, allowing us to use these codes for articial entries.
237 * Entry 0 is used for '.' and entry 1 is used for '..'. Do not
238 * allow '..' to cross the mount point into (e.g.) the super-root.
241 chain = (void *)(intptr_t)-1; /* non-NULL for early goto done case */
244 r = vop_write_dirent(&error, uio,
246 HAMMER2_DIRHASH_USERMSK,
251 cookies[cookie_index] = saveoff;
254 if (cookie_index == ncookies)
258 if (ip->pip == NULL || ip == hmp->iroot)
263 r = vop_write_dirent(&error, uio,
265 HAMMER2_DIRHASH_USERMSK,
270 cookies[cookie_index] = saveoff;
273 if (cookie_index == ncookies)
277 lkey = saveoff | HAMMER2_DIRHASH_VISIBLE;
280 hammer2_chain_ref(hmp, parent);
281 error = hammer2_chain_lock(hmp, parent);
283 hammer2_chain_put(hmp, parent);
286 chain = hammer2_chain_lookup(hmp, &parent, lkey, lkey, 0);
288 chain = hammer2_chain_lookup(hmp, &parent,
289 lkey, (hammer2_key_t)-1, 0);
292 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
293 dtype = hammer2_get_dtype(chain->u.ip);
294 saveoff = chain->bref.key & HAMMER2_DIRHASH_USERMSK;
295 r = vop_write_dirent(&error, uio,
296 chain->u.ip->ip_data.inum &
297 HAMMER2_DIRHASH_USERMSK,
298 dtype, chain->u.ip->ip_data.name_len,
299 chain->u.ip->ip_data.filename);
303 cookies[cookie_index] = saveoff;
306 /* XXX chain error */
307 kprintf("bad chain type readdir %d\n",
312 * Keys may not be returned in order so once we have a
313 * placemarker (chain) the scan must allow the full range
314 * or some entries will be missed.
316 chain = hammer2_chain_next(hmp, &parent, chain,
317 0, (hammer2_key_t)-1, 0);
319 saveoff = (chain->bref.key &
320 HAMMER2_DIRHASH_USERMSK) + 1;
322 saveoff = (hammer2_key_t)-1;
324 if (cookie_index == ncookies)
327 hammer2_chain_put(hmp, parent);
329 hammer2_chain_put(hmp, chain);
332 *ap->a_eofflag = (chain == NULL);
333 uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE;
334 if (error && cookie_index == 0) {
336 kfree(cookies, M_TEMP);
338 *ap->a_cookies = NULL;
342 *ap->a_ncookies = cookie_index;
343 *ap->a_cookies = cookies;
351 hammer2_vop_read(struct vop_read_args *ap)
354 hammer2_mount_t *hmp;
363 * Read operations supported on this vnode?
366 if (vp->v_type != VREG)
377 seqcount = ap->a_ioflag >> 16;
378 bigread = (uio->uio_resid > 100 * 1024 * 1024);
383 while (uio->uio_resid > 0 && uio->uio_offset < ip->ip_data.size) {
384 hammer2_key_t off_hi;
388 off_hi = uio->uio_offset & ~HAMMER2_LBUFMASK64;
389 off_lo = (int)(uio->uio_offset & HAMMER2_LBUFMASK64);
391 /* XXX bigread & signal check test */
393 error = cluster_read(vp, ip->ip_data.size, off_hi,
394 HAMMER2_LBUFSIZE, HAMMER2_PBUFSIZE,
395 seqcount * BKVASIZE, &bp);
398 n = HAMMER2_LBUFSIZE - off_lo;
399 if (n > uio->uio_resid)
401 if (n > ip->ip_data.size - uio->uio_offset)
402 n = (int)(ip->ip_data.size - uio->uio_offset);
403 bp->b_flags |= B_AGE;
404 uiomove((char *)bp->b_data + off_lo, n, uio);
412 hammer2_vop_write(struct vop_write_args *ap)
416 hammer2_mount_t *hmp;
426 * Read operations supported on this vnode?
429 if (vp->v_type != VREG)
443 seqcount = ap->a_ioflag >> 16;
444 bigwrite = (uio->uio_resid > 100 * 1024 * 1024);
447 * Check resource limit
449 if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc &&
450 uio->uio_offset + uio->uio_resid >
451 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
452 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ);
456 bigwrite = (uio->uio_resid > 100 * 1024 * 1024);
461 while (uio->uio_resid > 0) {
463 hammer2_key_t off_hi;
470 off_hi = uio->uio_offset & ~HAMMER2_LBUFMASK64;
471 off_lo = (int)(uio->uio_offset & HAMMER2_LBUFMASK64);
473 n = HAMMER2_LBUFSIZE - off_lo;
474 if (n > uio->uio_resid) {
480 nsize = uio->uio_offset + n;
482 /* XXX bigwrite & signal check test */
485 * Don't allow the buffer build to blow out the buffer
488 if ((ap->a_ioflag & IO_RECURSE) == 0)
489 bwillwrite(HAMMER2_LBUFSIZE);
492 * Extend the size of the file as needed
495 if (nsize > ip->ip_data.size) {
496 if (uio->uio_offset > ip->ip_data.size)
500 nvextendbuf(vp, ip->ip_data.size, nsize,
501 HAMMER2_LBUFSIZE, HAMMER2_LBUFSIZE,
502 (int)(ip->ip_data.size & HAMMER2_LBUFMASK),
505 kflags |= NOTE_EXTEND;
511 if (uio->uio_segflg == UIO_NOCOPY) {
513 * Issuing a write with the same data backing the
514 * buffer. Instantiate the buffer to collect the
515 * backing vm pages, then read-in any missing bits.
517 * This case is used by vop_stdputpages().
519 bp = getblk(vp, off_hi,
520 HAMMER2_LBUFSIZE, GETBLK_BHEAVY, 0);
521 if ((bp->b_flags & B_CACHE) == 0) {
523 error = bread(ap->a_vp,
524 off_hi, HAMMER2_LBUFSIZE, &bp);
526 } else if (off_lo == 0 && uio->uio_resid >= HAMMER2_LBUFSIZE) {
528 * Even though we are entirely overwriting the buffer
529 * we may still have to zero it out to avoid a
530 * mmap/write visibility issue.
532 bp = getblk(vp, off_hi,
533 HAMMER2_LBUFSIZE, GETBLK_BHEAVY, 0);
534 if ((bp->b_flags & B_CACHE) == 0)
536 } else if (off_hi >= ip->ip_data.size) {
538 * If the base offset of the buffer is beyond the
539 * file EOF, we don't have to issue a read.
541 bp = getblk(vp, off_hi,
542 HAMMER2_LBUFSIZE, GETBLK_BHEAVY, 0);
546 * Partial overwrite, read in any missing bits then
547 * replace the portion being written.
549 error = bread(vp, off_hi, HAMMER2_LBUFSIZE, &bp);
556 error = uiomove(bp->b_data + off_lo, n, uio);
563 nvtruncbuf(vp, ip->ip_data.size,
564 HAMMER2_LBUFSIZE, HAMMER2_LBUFSIZE);
568 kflags |= NOTE_WRITE;
569 if (ip->ip_data.size < uio->uio_offset)
570 ip->ip_data.size = uio->uio_offset;
571 /* XXX update ino_data.mtime */
574 * Once we dirty a buffer any cached offset becomes invalid.
576 bp->b_bio2.bio_offset = NOOFFSET;
577 bp->b_flags |= B_AGE;
578 if (ap->a_ioflag & IO_SYNC) {
580 } else if ((ap->a_ioflag & IO_DIRECT) && endofblk) {
582 } else if (ap->a_ioflag & IO_ASYNC) {
588 /* hammer2_knote(vp, kflags); */
594 hammer2_vop_nresolve(struct vop_nresolve_args *ap)
596 hammer2_inode_t *dip;
597 hammer2_mount_t *hmp;
598 hammer2_chain_t *parent;
599 hammer2_chain_t *chain;
600 struct namecache *ncp;
607 dip = VTOI(ap->a_dvp);
609 ncp = ap->a_nch->ncp;
611 name_len = ncp->nc_nlen;
612 lhc = hammer2_dirhash(name, name_len);
615 * Note: In DragonFly the kernel handles '.' and '..'.
617 parent = &dip->chain;
618 hammer2_chain_ref(hmp, parent);
619 hammer2_chain_lock(hmp, parent);
620 chain = hammer2_chain_lookup(hmp, &parent,
621 lhc, lhc + HAMMER2_DIRHASH_LOMASK,
624 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
626 name_len == chain->data->ipdata.name_len &&
627 bcmp(name, chain->data->ipdata.filename, name_len) == 0) {
630 chain = hammer2_chain_next(hmp, &parent, chain,
631 lhc, lhc + HAMMER2_DIRHASH_LOMASK,
634 hammer2_chain_put(hmp, parent);
637 vp = hammer2_igetv(chain->u.ip, &error);
640 cache_setvp(ap->a_nch, vp);
643 hammer2_chain_put(hmp, chain);
646 cache_setvp(ap->a_nch, NULL);
653 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
655 hammer2_inode_t *dip;
657 hammer2_mount_t *hmp;
660 dip = VTOI(ap->a_dvp);
663 if ((ip = dip->pip) == NULL) {
667 hammer2_chain_ref(hmp, &ip->chain);
668 hammer2_chain_lock(hmp, &ip->chain);
669 *ap->a_vpp = hammer2_igetv(ip, &error);
670 hammer2_chain_put(hmp, &ip->chain);
677 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
679 hammer2_mount_t *hmp;
680 hammer2_inode_t *dip;
681 hammer2_inode_t *nip;
682 struct namecache *ncp;
687 dip = VTOI(ap->a_dvp);
692 ncp = ap->a_nch->ncp;
694 name_len = ncp->nc_nlen;
696 error = hammer2_create_inode(hmp, ap->a_vap, ap->a_cred,
697 dip, name, name_len, &nip);
699 KKASSERT(nip == NULL);
703 *ap->a_vpp = hammer2_igetv(nip, &error);
704 hammer2_chain_put(hmp, &nip->chain);
707 cache_setunresolved(ap->a_nch);
708 cache_setvp(ap->a_nch, *ap->a_vpp);
714 * Return the largest contiguous physical disk range for the logical
717 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb)
721 hammer2_vop_bmap(struct vop_bmap_args *ap)
724 hammer2_mount_t *hmp;
726 hammer2_chain_t *parent;
727 hammer2_chain_t *chain;
732 * Only supported on regular files
734 * Only supported for read operations (required for cluster_read).
735 * The block allocation is delayed for write operations.
738 if (vp->v_type != VREG)
740 if (ap->a_cmd != BUF_CMD_READ)
746 loff = ap->a_loffset;
747 KKASSERT((loff & HAMMER2_LBUFMASK64) == 0);
750 hammer2_chain_ref(hmp, parent);
751 hammer2_chain_lock(hmp, parent);
752 chain = hammer2_chain_lookup(hmp, &parent, loff, loff, 0);
754 poff = loff - chain->bref.key +
755 (chain->bref.data_off & HAMMER2_OFF_MASK);
756 *ap->a_doffsetp = poff;
757 hammer2_chain_put(hmp, chain);
759 *ap->a_doffsetp = ZFOFFSET; /* zero-fill hole */
761 hammer2_chain_put(hmp, parent);
767 hammer2_vop_open(struct vop_open_args *ap)
769 return vop_stdopen(ap);
773 * hammer_vop_advlock { vp, id, op, fl, flags }
775 * MPSAFE - does not require fs_token
779 hammer2_vop_advlock(struct vop_advlock_args *ap)
781 hammer2_inode_t *ip = VTOI(ap->a_vp);
783 return (lf_advlock(ap, &ip->advlock, ip->ip_data.size));
789 hammer2_vop_close(struct vop_close_args *ap)
791 return vop_stdclose(ap);
795 * hammer_vop_ncreate { nch, dvp, vpp, cred, vap }
797 * The operating system has already ensured that the directory entry
798 * does not exist and done all appropriate namespace locking.
802 hammer2_vop_ncreate(struct vop_ncreate_args *ap)
804 hammer2_mount_t *hmp;
805 hammer2_inode_t *dip;
806 hammer2_inode_t *nip;
807 struct namecache *ncp;
812 dip = VTOI(ap->a_dvp);
817 ncp = ap->a_nch->ncp;
819 name_len = ncp->nc_nlen;
821 error = hammer2_create_inode(hmp, ap->a_vap, ap->a_cred,
822 dip, name, name_len, &nip);
824 KKASSERT(nip == NULL);
828 *ap->a_vpp = hammer2_igetv(nip, &error);
829 hammer2_chain_put(hmp, &nip->chain);
832 cache_setunresolved(ap->a_nch);
833 cache_setvp(ap->a_nch, *ap->a_vpp);
838 static int hammer2_strategy_read(struct vop_strategy_args *ap);
839 static int hammer2_strategy_write(struct vop_strategy_args *ap);
843 hammer2_vop_strategy(struct vop_strategy_args *ap)
854 error = hammer2_strategy_read(ap);
857 error = hammer2_strategy_write(ap);
860 bp->b_error = error = EINVAL;
861 bp->b_flags |= B_ERROR;
871 hammer2_strategy_read(struct vop_strategy_args *ap)
876 hammer2_mount_t *hmp;
878 hammer2_chain_t *parent;
879 hammer2_chain_t *chain;
887 nbio = push_bio(bio);
889 if (nbio->bio_offset == NOOFFSET) {
890 loff = bio->bio_offset;
891 KKASSERT((loff & HAMMER2_LBUFMASK64) == 0);
894 hammer2_chain_ref(hmp, parent);
895 hammer2_chain_lock(hmp, parent);
898 * Specifying NOLOCK avoids unnecessary bread()s of the
899 * chain element's content. We just need the block device
902 chain = hammer2_chain_lookup(hmp, &parent, loff, loff,
903 HAMMER2_LOOKUP_NOLOCK);
905 poff = loff - chain->bref.key +
906 (chain->bref.data_off & HAMMER2_OFF_MASK);
907 nbio->bio_offset = poff;
908 hammer2_chain_drop(hmp, chain);
910 nbio->bio_offset = ZFOFFSET;
912 hammer2_chain_put(hmp, parent);
914 if (nbio->bio_offset == ZFOFFSET) {
920 vn_strategy(hmp->devvp, nbio);
927 hammer2_strategy_write(struct vop_strategy_args *ap)
932 hammer2_mount_t *hmp;
934 hammer2_chain_t *parent;
935 hammer2_chain_t *chain;
936 hammer2_key_t off_hi;
943 nbio = push_bio(bio);
946 * Our bmap doesn't support writes atm, and a vop_write should
947 * clear the physical disk offset cache for the copy-on-write
950 KKASSERT(nbio->bio_offset == NOOFFSET);
952 off_hi = bio->bio_offset & HAMMER2_OFF_MASK_HI;
953 off_lo = bio->bio_offset & HAMMER2_OFF_MASK_LO;
954 KKASSERT((bio->bio_offset & HAMMER2_LBUFMASK64) == 0);
957 hammer2_chain_ref(hmp, parent);
958 hammer2_chain_lock(hmp, parent);
959 chain = hammer2_chain_lookup(hmp, &parent, off_hi, off_hi, 0);
961 hammer2_chain_modify(hmp, chain);
962 bcopy(bp->b_data, chain->data->buf + off_lo, bp->b_bcount);
964 chain = hammer2_chain_create(hmp, parent,
965 off_hi, HAMMER2_PBUFRADIX,
966 HAMMER2_BREF_TYPE_DATA,
968 bcopy(bp->b_data, chain->data->buf + off_lo, bp->b_bcount);
970 if (off_lo + bp->b_bcount == HAMMER2_PBUFSIZE)
971 atomic_set_int(&chain->flags, HAMMER2_CHAIN_IOFLUSH);
972 hammer2_chain_put(hmp, chain);
973 hammer2_chain_put(hmp, parent);
984 hammer2_vop_mountctl(struct vop_mountctl_args *ap)
987 struct hammer2_mount *hmp;
991 case (MOUNTCTL_SET_EXPORT):
992 mp = ap->a_head.a_ops->head.vv_mount;
995 if (ap->a_ctllen != sizeof(struct export_args))
998 rc = vfs_export(mp, &hmp->export,
999 (const struct export_args *)ap->a_ctl);
1002 rc = vop_stdmountctl(ap);
1008 struct vop_ops hammer2_vnode_vops = {
1009 .vop_default = vop_defaultop,
1010 .vop_fsync = hammer2_vop_fsync,
1011 .vop_getpages = vop_stdgetpages,
1012 .vop_putpages = vop_stdputpages,
1013 .vop_access = hammer2_vop_access,
1014 .vop_advlock = hammer2_vop_advlock,
1015 .vop_close = hammer2_vop_close,
1016 .vop_ncreate = hammer2_vop_ncreate,
1017 .vop_getattr = hammer2_vop_getattr,
1018 .vop_readdir = hammer2_vop_readdir,
1019 .vop_getpages = vop_stdgetpages,
1020 .vop_putpages = vop_stdputpages,
1021 .vop_read = hammer2_vop_read,
1022 .vop_write = hammer2_vop_write,
1023 .vop_open = hammer2_vop_open,
1024 .vop_inactive = hammer2_vop_inactive,
1025 .vop_reclaim = hammer2_vop_reclaim,
1026 .vop_nresolve = hammer2_vop_nresolve,
1027 .vop_nlookupdotdot = hammer2_vop_nlookupdotdot,
1028 .vop_nmkdir = hammer2_vop_nmkdir,
1029 .vop_mountctl = hammer2_vop_mountctl,
1030 .vop_bmap = hammer2_vop_bmap,
1031 .vop_strategy = hammer2_vop_strategy,
1034 struct vop_ops hammer2_spec_vops = {
1038 struct vop_ops hammer2_fifo_vops = {