2 * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/fcntl.h>
41 #include <sys/namei.h>
42 #include <sys/mount.h>
43 #include <sys/vnode.h>
44 #include <sys/mountctl.h>
45 #include <sys/dirent.h>
49 #define ZFOFFSET (-2LL)
52 * Last reference to a vnode is going away but it is still cached.
56 hammer2_vop_inactive(struct vop_inactive_args *ap)
59 struct hammer2_inode *ip;
61 struct hammer2_mount *hmp;
79 * Reclaim a vnode so that it can be reused; after the inode is
80 * disassociated, the filesystem must manage it alone.
84 hammer2_vop_reclaim(struct vop_reclaim_args *ap)
86 struct hammer2_inode *ip;
87 struct hammer2_mount *hmp;
96 hammer2_inode_lock_ex(ip);
99 hammer2_chain_flush(hmp, &ip->chain, NULL);
100 hammer2_inode_unlock_ex(ip);
101 hammer2_chain_drop(hmp, &ip->chain); /* vp ref removed */
104 * XXX handle background sync when ip dirty, kernel will no longer
105 * notify us regarding this inode because there is no longer a
106 * vnode attached to it.
114 hammer2_vop_fsync(struct vop_fsync_args *ap)
116 struct hammer2_inode *ip;
117 struct hammer2_mount *hmp;
124 hammer2_inode_lock_ex(ip);
125 vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
126 hammer2_chain_flush(hmp, &ip->chain, NULL);
127 hammer2_inode_unlock_ex(ip);
133 hammer2_vop_access(struct vop_access_args *ap)
135 hammer2_inode_t *ip = VTOI(ap->a_vp);
140 uid = hammer2_to_unix_xid(&ip->ip_data.uid);
141 gid = hammer2_to_unix_xid(&ip->ip_data.gid);
143 error = vop_helper_access(ap, uid, gid, ip->ip_data.mode,
150 hammer2_vop_getattr(struct vop_getattr_args *ap)
152 hammer2_mount_t *hmp;
163 hammer2_inode_lock_sh(ip);
165 vap->va_fsid = hmp->mp->mnt_stat.f_fsid.val[0];
166 vap->va_fileid = ip->ip_data.inum;
167 vap->va_mode = ip->ip_data.mode;
168 vap->va_nlink = ip->ip_data.nlinks;
173 vap->va_size = ip->ip_data.size;
174 vap->va_blocksize = HAMMER2_PBUFSIZE;
175 vap->va_flags = ip->ip_data.uflags;
176 hammer2_time_to_timespec(ip->ip_data.ctime, &vap->va_ctime);
177 hammer2_time_to_timespec(ip->ip_data.mtime, &vap->va_mtime);
178 hammer2_time_to_timespec(ip->ip_data.mtime, &vap->va_atime);
180 vap->va_bytes = vap->va_size;
181 vap->va_type = hammer2_get_vtype(ip);
183 vap->va_uid_uuid = ip->ip_data.uid;
184 vap->va_gid_uuid = ip->ip_data.gid;
185 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
188 hammer2_inode_unlock_sh(ip);
195 hammer2_vop_readdir(struct vop_readdir_args *ap)
197 hammer2_mount_t *hmp;
199 hammer2_inode_t *xip;
200 hammer2_chain_t *parent;
201 hammer2_chain_t *chain;
215 saveoff = uio->uio_offset;
218 * Setup cookies directory entry cookies if requested
220 if (ap->a_ncookies) {
221 ncookies = uio->uio_resid / 16 + 1;
224 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
232 * Handle artificial entries. To ensure that only positive 64 bit
233 * quantities are returned to userland we always strip off bit 63.
234 * The hash code is designed such that codes 0x0000-0x7FFF are not
235 * used, allowing us to use these codes for articial entries.
237 * Entry 0 is used for '.' and entry 1 is used for '..'. Do not
238 * allow '..' to cross the mount point into (e.g.) the super-root.
241 chain = (void *)(intptr_t)-1; /* non-NULL early done means not eof */
244 r = vop_write_dirent(&error, uio,
246 HAMMER2_DIRHASH_USERMSK,
251 cookies[cookie_index] = saveoff;
254 if (cookie_index == ncookies)
258 if (ip->pip == NULL || ip == hmp->iroot)
263 r = vop_write_dirent(&error, uio,
265 HAMMER2_DIRHASH_USERMSK,
270 cookies[cookie_index] = saveoff;
273 if (cookie_index == ncookies)
277 lkey = saveoff | HAMMER2_DIRHASH_VISIBLE;
280 hammer2_chain_ref(hmp, parent);
281 error = hammer2_chain_lock(hmp, parent);
283 hammer2_chain_put(hmp, parent);
286 chain = hammer2_chain_lookup(hmp, &parent, lkey, (hammer2_key_t)-1, 0);
288 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
289 dtype = hammer2_get_dtype(chain->u.ip);
290 saveoff = chain->bref.key & HAMMER2_DIRHASH_USERMSK;
291 r = vop_write_dirent(&error, uio,
292 chain->u.ip->ip_data.inum &
293 HAMMER2_DIRHASH_USERMSK,
294 dtype, chain->u.ip->ip_data.name_len,
295 chain->u.ip->ip_data.filename);
299 cookies[cookie_index] = saveoff;
302 /* XXX chain error */
303 kprintf("bad chain type readdir %d\n",
308 * Keys may not be returned in order so once we have a
309 * placemarker (chain) the scan must allow the full range
310 * or some entries will be missed.
312 chain = hammer2_chain_next(hmp, &parent, chain,
313 0, (hammer2_key_t)-1, 0);
315 saveoff = (chain->bref.key &
316 HAMMER2_DIRHASH_USERMSK) + 1;
318 saveoff = (hammer2_key_t)-1;
320 if (cookie_index == ncookies)
323 hammer2_chain_put(hmp, parent);
325 hammer2_chain_put(hmp, chain);
328 *ap->a_eofflag = (chain == NULL);
329 uio->uio_offset = saveoff;
330 if (error && cookie_index == 0) {
332 kfree(cookies, M_TEMP);
334 *ap->a_cookies = NULL;
338 *ap->a_ncookies = cookie_index;
339 *ap->a_cookies = cookies;
347 hammer2_vop_read(struct vop_read_args *ap)
350 hammer2_mount_t *hmp;
359 * Read operations supported on this vnode?
362 if (vp->v_type != VREG)
373 seqcount = ap->a_ioflag >> 16;
374 bigread = (uio->uio_resid > 100 * 1024 * 1024);
379 while (uio->uio_resid > 0 && uio->uio_offset < ip->ip_data.size) {
380 hammer2_key_t off_hi;
384 off_hi = uio->uio_offset & ~HAMMER2_LBUFMASK64;
385 off_lo = (int)(uio->uio_offset & HAMMER2_LBUFMASK64);
387 /* XXX bigread & signal check test */
389 error = cluster_read(vp, ip->ip_data.size, off_hi,
390 HAMMER2_LBUFSIZE, HAMMER2_PBUFSIZE,
391 seqcount * BKVASIZE, &bp);
394 n = HAMMER2_LBUFSIZE - off_lo;
395 if (n > uio->uio_resid)
397 if (n > ip->ip_data.size - uio->uio_offset)
398 n = (int)(ip->ip_data.size - uio->uio_offset);
399 bp->b_flags |= B_AGE;
400 uiomove((char *)bp->b_data + off_lo, n, uio);
408 hammer2_vop_write(struct vop_write_args *ap)
412 hammer2_mount_t *hmp;
422 * Read operations supported on this vnode?
425 if (vp->v_type != VREG)
439 seqcount = ap->a_ioflag >> 16;
440 bigwrite = (uio->uio_resid > 100 * 1024 * 1024);
443 * Check resource limit
445 if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc &&
446 uio->uio_offset + uio->uio_resid >
447 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
448 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ);
452 bigwrite = (uio->uio_resid > 100 * 1024 * 1024);
457 while (uio->uio_resid > 0) {
459 hammer2_key_t off_hi;
466 off_hi = uio->uio_offset & ~HAMMER2_LBUFMASK64;
467 off_lo = (int)(uio->uio_offset & HAMMER2_LBUFMASK64);
469 n = HAMMER2_LBUFSIZE - off_lo;
470 if (n > uio->uio_resid) {
476 nsize = uio->uio_offset + n;
478 /* XXX bigwrite & signal check test */
481 * Don't allow the buffer build to blow out the buffer
484 if ((ap->a_ioflag & IO_RECURSE) == 0)
485 bwillwrite(HAMMER2_LBUFSIZE);
488 * Extend the size of the file as needed
491 if (nsize > ip->ip_data.size) {
492 if (uio->uio_offset > ip->ip_data.size)
496 nvextendbuf(vp, ip->ip_data.size, nsize,
497 HAMMER2_LBUFSIZE, HAMMER2_LBUFSIZE,
498 (int)(ip->ip_data.size & HAMMER2_LBUFMASK),
501 kflags |= NOTE_EXTEND;
507 if (uio->uio_segflg == UIO_NOCOPY) {
509 * Issuing a write with the same data backing the
510 * buffer. Instantiate the buffer to collect the
511 * backing vm pages, then read-in any missing bits.
513 * This case is used by vop_stdputpages().
515 bp = getblk(vp, off_hi,
516 HAMMER2_LBUFSIZE, GETBLK_BHEAVY, 0);
517 if ((bp->b_flags & B_CACHE) == 0) {
519 error = bread(ap->a_vp,
520 off_hi, HAMMER2_LBUFSIZE, &bp);
522 } else if (off_lo == 0 && uio->uio_resid >= HAMMER2_LBUFSIZE) {
524 * Even though we are entirely overwriting the buffer
525 * we may still have to zero it out to avoid a
526 * mmap/write visibility issue.
528 bp = getblk(vp, off_hi,
529 HAMMER2_LBUFSIZE, GETBLK_BHEAVY, 0);
530 if ((bp->b_flags & B_CACHE) == 0)
532 } else if (off_hi >= ip->ip_data.size) {
534 * If the base offset of the buffer is beyond the
535 * file EOF, we don't have to issue a read.
537 bp = getblk(vp, off_hi,
538 HAMMER2_LBUFSIZE, GETBLK_BHEAVY, 0);
542 * Partial overwrite, read in any missing bits then
543 * replace the portion being written.
545 error = bread(vp, off_hi, HAMMER2_LBUFSIZE, &bp);
552 error = uiomove(bp->b_data + off_lo, n, uio);
559 nvtruncbuf(vp, ip->ip_data.size,
560 HAMMER2_LBUFSIZE, HAMMER2_LBUFSIZE);
564 kflags |= NOTE_WRITE;
565 if (ip->ip_data.size < uio->uio_offset)
566 ip->ip_data.size = uio->uio_offset;
567 /* XXX update ino_data.mtime */
570 * Once we dirty a buffer any cached offset becomes invalid.
572 bp->b_bio2.bio_offset = NOOFFSET;
573 bp->b_flags |= B_AGE;
574 if (ap->a_ioflag & IO_SYNC) {
576 } else if ((ap->a_ioflag & IO_DIRECT) && endofblk) {
578 } else if (ap->a_ioflag & IO_ASYNC) {
584 /* hammer2_knote(vp, kflags); */
590 hammer2_vop_nresolve(struct vop_nresolve_args *ap)
592 hammer2_inode_t *dip;
593 hammer2_mount_t *hmp;
594 hammer2_chain_t *parent;
595 hammer2_chain_t *chain;
596 struct namecache *ncp;
603 dip = VTOI(ap->a_dvp);
605 ncp = ap->a_nch->ncp;
607 name_len = ncp->nc_nlen;
608 lhc = hammer2_dirhash(name, name_len);
611 * Note: In DragonFly the kernel handles '.' and '..'.
613 parent = &dip->chain;
614 hammer2_chain_ref(hmp, parent);
615 hammer2_chain_lock(hmp, parent);
616 chain = hammer2_chain_lookup(hmp, &parent,
617 lhc, lhc + HAMMER2_DIRHASH_LOMASK,
620 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
622 name_len == chain->data->ipdata.name_len &&
623 bcmp(name, chain->data->ipdata.filename, name_len) == 0) {
626 chain = hammer2_chain_next(hmp, &parent, chain,
627 lhc, lhc + HAMMER2_DIRHASH_LOMASK,
630 hammer2_chain_put(hmp, parent);
633 vp = hammer2_igetv(chain->u.ip, &error);
636 cache_setvp(ap->a_nch, vp);
639 hammer2_chain_put(hmp, chain);
642 cache_setvp(ap->a_nch, NULL);
649 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
651 hammer2_inode_t *dip;
653 hammer2_mount_t *hmp;
656 dip = VTOI(ap->a_dvp);
659 if ((ip = dip->pip) == NULL) {
663 hammer2_chain_ref(hmp, &ip->chain);
664 hammer2_chain_lock(hmp, &ip->chain);
665 *ap->a_vpp = hammer2_igetv(ip, &error);
666 hammer2_chain_put(hmp, &ip->chain);
673 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
675 hammer2_mount_t *hmp;
676 hammer2_inode_t *dip;
677 hammer2_inode_t *nip;
678 struct namecache *ncp;
683 dip = VTOI(ap->a_dvp);
688 ncp = ap->a_nch->ncp;
690 name_len = ncp->nc_nlen;
692 error = hammer2_create_inode(hmp, ap->a_vap, ap->a_cred,
693 dip, name, name_len, &nip);
695 KKASSERT(nip == NULL);
699 *ap->a_vpp = hammer2_igetv(nip, &error);
700 hammer2_chain_put(hmp, &nip->chain);
703 cache_setunresolved(ap->a_nch);
704 cache_setvp(ap->a_nch, *ap->a_vpp);
710 * Return the largest contiguous physical disk range for the logical
713 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb)
717 hammer2_vop_bmap(struct vop_bmap_args *ap)
720 hammer2_mount_t *hmp;
722 hammer2_chain_t *parent;
723 hammer2_chain_t *chain;
728 * Only supported on regular files
730 * Only supported for read operations (required for cluster_read).
731 * The block allocation is delayed for write operations.
734 if (vp->v_type != VREG)
736 if (ap->a_cmd != BUF_CMD_READ)
742 loff = ap->a_loffset;
743 KKASSERT((loff & HAMMER2_LBUFMASK64) == 0);
746 hammer2_chain_ref(hmp, parent);
747 hammer2_chain_lock(hmp, parent);
748 chain = hammer2_chain_lookup(hmp, &parent, loff, loff, 0);
750 poff = loff - chain->bref.key +
751 (chain->bref.data_off & HAMMER2_OFF_MASK);
752 *ap->a_doffsetp = poff;
753 hammer2_chain_put(hmp, chain);
755 *ap->a_doffsetp = ZFOFFSET; /* zero-fill hole */
757 hammer2_chain_put(hmp, parent);
763 hammer2_vop_open(struct vop_open_args *ap)
765 return vop_stdopen(ap);
770 hammer2_vop_close(struct vop_close_args *ap)
772 return vop_stdclose(ap);
776 * hammer_vop_ncreate { nch, dvp, vpp, cred, vap }
778 * The operating system has already ensured that the directory entry
779 * does not exist and done all appropriate namespace locking.
783 hammer2_vop_ncreate(struct vop_ncreate_args *ap)
785 hammer2_mount_t *hmp;
786 hammer2_inode_t *dip;
787 hammer2_inode_t *nip;
788 struct namecache *ncp;
793 dip = VTOI(ap->a_dvp);
798 ncp = ap->a_nch->ncp;
800 name_len = ncp->nc_nlen;
802 error = hammer2_create_inode(hmp, ap->a_vap, ap->a_cred,
803 dip, name, name_len, &nip);
805 KKASSERT(nip == NULL);
809 *ap->a_vpp = hammer2_igetv(nip, &error);
810 hammer2_chain_put(hmp, &nip->chain);
813 cache_setunresolved(ap->a_nch);
814 cache_setvp(ap->a_nch, *ap->a_vpp);
819 static int hammer2_strategy_read(struct vop_strategy_args *ap);
820 static int hammer2_strategy_write(struct vop_strategy_args *ap);
824 hammer2_vop_strategy(struct vop_strategy_args *ap)
835 error = hammer2_strategy_read(ap);
838 error = hammer2_strategy_write(ap);
841 bp->b_error = error = EINVAL;
842 bp->b_flags |= B_ERROR;
852 hammer2_strategy_read(struct vop_strategy_args *ap)
857 hammer2_mount_t *hmp;
859 hammer2_chain_t *parent;
860 hammer2_chain_t *chain;
868 nbio = push_bio(bio);
870 if (nbio->bio_offset == NOOFFSET) {
871 loff = bio->bio_offset;
872 KKASSERT((loff & HAMMER2_LBUFMASK64) == 0);
875 hammer2_chain_ref(hmp, parent);
876 hammer2_chain_lock(hmp, parent);
879 * Specifying NOLOCK avoids unnecessary bread()s of the
880 * chain element's content. We just need the block device
883 chain = hammer2_chain_lookup(hmp, &parent, loff, loff,
884 HAMMER2_LOOKUP_NOLOCK);
886 poff = loff - chain->bref.key +
887 (chain->bref.data_off & HAMMER2_OFF_MASK);
888 nbio->bio_offset = poff;
889 hammer2_chain_drop(hmp, chain);
891 nbio->bio_offset = ZFOFFSET;
893 hammer2_chain_put(hmp, parent);
895 if (nbio->bio_offset == ZFOFFSET) {
901 vn_strategy(hmp->devvp, nbio);
908 hammer2_strategy_write(struct vop_strategy_args *ap)
913 hammer2_mount_t *hmp;
915 hammer2_chain_t *parent;
916 hammer2_chain_t *chain;
917 hammer2_key_t off_hi;
924 nbio = push_bio(bio);
927 * Our bmap doesn't support writes atm, and a vop_write should
928 * clear the physical disk offset cache for the copy-on-write
931 KKASSERT(nbio->bio_offset == NOOFFSET);
933 off_hi = bio->bio_offset & HAMMER2_OFF_MASK_HI;
934 off_lo = bio->bio_offset & HAMMER2_OFF_MASK_LO;
935 KKASSERT((bio->bio_offset & HAMMER2_LBUFMASK64) == 0);
938 hammer2_chain_ref(hmp, parent);
939 hammer2_chain_lock(hmp, parent);
940 chain = hammer2_chain_lookup(hmp, &parent, off_hi, off_hi, 0);
942 hammer2_chain_modify(hmp, chain);
943 bcopy(bp->b_data, chain->data->buf + off_lo, bp->b_bcount);
944 hammer2_chain_put(hmp, chain);
946 chain = hammer2_chain_create(hmp, parent,
947 off_hi, HAMMER2_PBUFRADIX,
948 HAMMER2_BREF_TYPE_DATA,
950 bcopy(bp->b_data, chain->data->buf + off_lo, bp->b_bcount);
951 hammer2_chain_put(hmp, chain);
953 hammer2_chain_put(hmp, parent);
964 hammer2_vop_mountctl(struct vop_mountctl_args *ap)
967 struct hammer2_mount *hmp;
971 case (MOUNTCTL_SET_EXPORT):
972 mp = ap->a_head.a_ops->head.vv_mount;
975 if (ap->a_ctllen != sizeof(struct export_args))
978 rc = vfs_export(mp, &hmp->export,
979 (const struct export_args *)ap->a_ctl);
982 rc = vop_stdmountctl(ap);
988 struct vop_ops hammer2_vnode_vops = {
989 .vop_default = vop_defaultop,
990 .vop_fsync = hammer2_vop_fsync,
991 .vop_getpages = vop_stdgetpages,
992 .vop_putpages = vop_stdputpages,
993 .vop_access = hammer2_vop_access,
994 .vop_close = hammer2_vop_close,
995 .vop_ncreate = hammer2_vop_ncreate,
996 .vop_getattr = hammer2_vop_getattr,
997 .vop_readdir = hammer2_vop_readdir,
998 .vop_getpages = vop_stdgetpages,
999 .vop_putpages = vop_stdputpages,
1000 .vop_read = hammer2_vop_read,
1001 .vop_write = hammer2_vop_write,
1002 .vop_open = hammer2_vop_open,
1003 .vop_inactive = hammer2_vop_inactive,
1004 .vop_reclaim = hammer2_vop_reclaim,
1005 .vop_nresolve = hammer2_vop_nresolve,
1006 .vop_nlookupdotdot = hammer2_vop_nlookupdotdot,
1007 .vop_nmkdir = hammer2_vop_nmkdir,
1008 .vop_mountctl = hammer2_vop_mountctl,
1009 .vop_bmap = hammer2_vop_bmap,
1010 .vop_strategy = hammer2_vop_strategy,
1013 struct vop_ops hammer2_spec_vops = {
1017 struct vop_ops hammer2_fifo_vops = {