2 * Copyright (c) 2013 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Antonio Huete Jimenez <tuxillo@quantumachine.net>
6 * by Matthew Dillon <dillon@dragonflybsd.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 #include <sys/mount.h>
42 #include <sys/queue.h>
43 #include <sys/spinlock2.h>
45 #include <sys/systm.h>
46 #include <sys/types.h>
47 #include <sys/vfscache.h>
48 #include <sys/vnode.h>
53 * Allocate and setup all is needed for the dirfs node to hold the filename.
54 * Note: dn_name is NULL terminated.
57 dirfs_node_setname(dirfs_node_t dnp, const char *name, int len)
62 kfree(dnp->dn_name, M_DIRFS_MISC);
63 dnp->dn_name = kmalloc(len + 1, M_DIRFS_MISC, M_WAITOK | M_ZERO);
64 bcopy(name, dnp->dn_name, len);
65 dnp->dn_name[len] = 0;
66 dnp->dn_namelen = len;
70 * Allocate enough space to hold a dirfs node structure.
71 * Note: Node name and length isn't handled here.
74 dirfs_node_alloc(struct mount *mp)
80 dnp = kmalloc(sizeof(*dnp), M_DIRFS_NODE, M_WAITOK | M_ZERO);
81 lockinit(&dnp->dn_lock, "dfsnode", 0, LK_CANRECURSE);
83 dnp->dn_fd = DIRFS_NOFD;
89 * Drops a reference to the node and. Node is freed when in the last reference.
92 dirfs_node_drop(dirfs_mount_t dmp, dirfs_node_t dnp)
96 if (dirfs_node_unref(dnp))
97 dirfs_node_free(dmp, dnp);
101 * Removes the association with its parent. Before freeing up its resources
102 * the node will be removed from the per-mount passive fd cache and its fd
103 * will be closed, either normally or forced.
106 dirfs_node_free(dirfs_mount_t dmp, dirfs_node_t dnp)
112 KKASSERT(dnp != NULL);
115 KKASSERT(dirfs_node_refcnt(dnp) == 0);
117 vp = NODE_TO_VP(dnp);
119 * Remove the inode from the passive fds list
120 * as we are tearing down the node.
121 * Root inode will be removed on VOP_UNMOUNT()
123 dirfs_mount_gettoken(dmp);
125 if (dnp->dn_parent) { /* NULL when children reaped parents */
126 dirfs_node_drop(dmp, dnp->dn_parent);
127 dnp->dn_parent = NULL;
129 dirfs_node_setpassive(dmp, dnp, 0);
131 kfree(dnp->dn_name, M_DIRFS_MISC);
136 * The file descriptor should have been closed already by the
137 * previous call to dirfs_set-passive. If not, force a sync and
140 if (dnp->dn_fd != DIRFS_NOFD) {
142 VOP_FSYNC(vp, MNT_WAIT, 0);
144 dnp->dn_fd = DIRFS_NOFD;
147 lockuninit(&dnp->dn_lock);
148 kfree(dnp, M_DIRFS_NODE);
151 dirfs_mount_reltoken(dmp);
157 * Do all the operations needed to get a resulting inode <--> host file
158 * association. This or may not include opening the file, which should be
159 * only needed when creating it.
161 * In the case vap is not NULL and openflags are specified, open the file.
164 dirfs_alloc_file(dirfs_mount_t dmp, dirfs_node_t *dnpp, dirfs_node_t pdnp,
165 struct namecache *ncp, struct vnode **vpp, struct vattr *vap,
180 mp = DIRFS_TO_VFS(dmp);
186 dnp = dirfs_node_alloc(mp);
187 KKASSERT(dnp != NULL);
189 dirfs_node_lock(dnp);
190 dirfs_node_setname(dnp, ncp->nc_name, ncp->nc_nlen);
191 dnp->dn_parent = pdnp;
192 dirfs_node_ref(pdnp); /* Children ref */
193 dirfs_node_unlock(dnp);
195 pathnp = dirfs_findfd(dmp, dnp, &tmp, &pathfree);
197 if (openflags && vap != NULL) {
198 dnp->dn_fd = openat(pathnp->dn_fd, tmp,
199 openflags, vap->va_mode);
200 if (dnp->dn_fd == -1) {
201 dirfs_dropfd(dmp, pathnp, pathfree);
206 error = dirfs_node_stat(pathnp->dn_fd, tmp, dnp);
207 if (error) { /* XXX Handle errors */
210 dirfs_free_vp(dmp, dnp);
211 dirfs_node_free(dmp, dnp);
212 dirfs_dropfd(dmp, pathnp, pathfree);
216 dirfs_alloc_vp(mp, &vp, LK_CANRECURSE, dnp);
220 dbg(9, "tmp=%s dnp=%p allocated\n", tmp, dnp);
221 dirfs_dropfd(dmp, pathnp, pathfree);
223 /* We want VOP_INACTIVE() to be called on last ref */
224 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE);
230 * Requires an already dirfs_node_t that has been already lstat(2)
231 * for the type comparison
234 dirfs_alloc_vp(struct mount *mp, struct vnode **vpp, int lkflags,
238 dirfs_mount_t dmp = VFS_TO_DIRFS(mp);
243 * Handle vnode reclaim/alloc races
248 if (vget(vp, LK_EXCLUSIVE) == 0)
250 /* vget raced a reclaim, retry */
252 getnewvnode(VT_UNUSED10, mp, &vp, 0, lkflags);
253 if (dnp->dn_vnode == NULL) {
256 vp->v_type = dnp->dn_type;
257 if (dmp->dm_root == dnp)
258 vsetflags(vp, VROOT);
259 dirfs_node_ref(dnp); /* ref for dnp<->vp */
261 /* Type-specific initialization. */
262 switch (dnp->dn_type) {
268 vinitvmio(vp, dnp->dn_size, BMASK, -1);
273 // vp->v_ops = &mp->mnt_vn_fifo_ops;
278 panic("dirfs_alloc_vp: dnp=%p vp=%p "
280 dnp, vp, dnp->dn_type);
288 /* multiple dirfs_alloc_vp calls raced, retry */
291 KKASSERT(vp != NULL);
293 dbg(9, "dnp=%p vp=%p type=%d\n", dnp, vp, vp->v_type);
297 * Do not call locked!
300 dirfs_free_vp(dirfs_mount_t dmp, dirfs_node_t dnp)
302 struct vnode *vp = NODE_TO_VP(dnp);
306 dnp->dn_vnode = NULL;
308 dirfs_node_drop(dmp, dnp);
312 dirfs_nodetype(struct stat *st)
315 mode_t mode = st->st_mode;
319 else if (S_ISBLK(mode))
321 else if (S_ISCHR(mode))
323 else if (S_ISFIFO(mode))
325 else if (S_ISSOCK(mode))
327 else if (S_ISLNK(mode))
329 else if (S_ISREG(mode))
338 dirfs_node_stat(int fd, const char *path, dirfs_node_t dnp)
344 if (fd == DIRFS_NOFD)
345 error = lstat(path, &st);
347 error = fstatat(fd, path, &st, AT_SYMLINK_NOFOLLOW);
352 /* Populate our dirfs node struct with stat data */
353 dnp->dn_uid = st.st_uid;
354 dnp->dn_gid = st.st_gid;
355 dnp->dn_mode = st.st_mode;
356 dnp->dn_flags = st.st_flags;
357 dnp->dn_links = st.st_nlink;
358 dnp->dn_atime = st.st_atime;
359 dnp->dn_atimensec = (st.st_atime * 1000000000L);
360 dnp->dn_mtime = st.st_mtime;
361 dnp->dn_mtimensec = (st.st_mtime * 1000000000L);
362 dnp->dn_ctime = st.st_ctime;
363 dnp->dn_ctimensec = (st.st_ctime * 1000000000L);
364 dnp->dn_gen = st.st_gen;
365 dnp->dn_ino = st.st_ino;
366 dnp->dn_st_dev = st.st_dev;
367 dnp->dn_size = st.st_size;
368 dnp->dn_type = dirfs_nodetype(&st);
374 dirfs_node_absolute_path(dirfs_mount_t dmp, dirfs_node_t cur, char **pathfreep)
376 return(dirfs_node_absolute_path_plus(dmp, cur, NULL, pathfreep));
380 dirfs_node_absolute_path_plus(dirfs_mount_t dmp, dirfs_node_t cur,
381 char *last, char **pathfreep)
390 KKASSERT(dmp->dm_root); /* Sanity check */
394 buf = kmalloc(MAXPATHLEN + 1, M_DIRFS_MISC, M_WAITOK);
397 * Passed-in trailing element.
404 if (count <= MAXPATHLEN)
405 bcopy(last, &buf[MAXPATHLEN - count], len);
407 if (count <= MAXPATHLEN)
408 buf[MAXPATHLEN - count] = '/';
412 * Iterate through the parents until we hit the root.
415 while (dirfs_node_isroot(dnp1) == 0) {
416 count += dnp1->dn_namelen;
417 if (count <= MAXPATHLEN) {
418 bcopy(dnp1->dn_name, &buf[MAXPATHLEN - count],
422 if (count <= MAXPATHLEN)
423 buf[MAXPATHLEN - count] = '/';
424 dnp1 = dnp1->dn_parent;
430 * Prefix with the root mount path. If the element was unlinked
431 * dnp1 will be NULL and there is no path.
433 len = strlen(dmp->dm_path);
435 if (dnp1 && count <= MAXPATHLEN) {
436 bcopy(dmp->dm_path, &buf[MAXPATHLEN - count], len);
438 dbg(9, "absolute_path %s\n", &buf[MAXPATHLEN - count]);
439 return (&buf[MAXPATHLEN - count]);
441 kfree(buf, M_DIRFS_MISC);
448 * Return a dirfs_node with a valid descriptor plus an allocated
449 * relative path which can be used in openat(), fstatat(), etc calls
450 * to locate the requested inode.
453 dirfs_findfd(dirfs_mount_t dmp, dirfs_node_t cur,
454 char **pathto, char **pathfreep)
468 buf = kmalloc(MAXPATHLEN + 1, M_DIRFS_MISC, M_WAITOK | M_ZERO);
472 while (dnp1 == cur || dnp1->dn_fd == DIRFS_NOFD) {
473 count += dnp1->dn_namelen;
474 if (count <= MAXPATHLEN) {
475 bcopy(dnp1->dn_name, &buf[MAXPATHLEN - count],
479 if (count <= MAXPATHLEN)
480 buf[MAXPATHLEN - count] = '/';
481 dnp1 = dnp1->dn_parent;
482 KKASSERT(dnp1 != NULL);
485 if (dnp1 && count <= MAXPATHLEN) {
487 *pathto = &buf[MAXPATHLEN - count + 1]; /* skip '/' prefix */
488 dirfs_node_ref(dnp1);
489 dbg(9, "fd=%d dnp1=%p dnp1->dn_name=%d &buf[off]=%s\n",
490 dnp1->dn_fd, dnp1, dnp1->dn_name, *pathto);
492 dbg(9, "failed too long\n");
493 kfree(buf, M_DIRFS_MISC);
502 dirfs_dropfd(dirfs_mount_t dmp, dirfs_node_t dnp1, char *pathfree)
505 kfree(pathfree, M_DIRFS_MISC);
507 dirfs_node_drop(dmp, dnp1);
511 dirfs_node_getperms(dirfs_node_t dnp, int *flags)
514 struct vnode *vp = dnp->dn_vnode;
519 * There must be an active vnode anyways since that
520 * would indicate the dirfs node has valid data for
521 * for dnp->dn_mode (via lstat syscall).
524 dmp = VFS_TO_DIRFS(vp->v_mount);
526 isowner = (dmp->dm_uid == dnp->dn_uid);
527 isgroup = (dmp->dm_gid == dnp->dn_gid);
530 if (dnp->dn_mode & S_IRUSR)
531 *flags |= DIRFS_NODE_RD;
532 if (dnp->dn_mode & S_IWUSR)
533 *flags |= DIRFS_NODE_WR;
534 if (dnp->dn_mode & S_IXUSR)
535 *flags |= DIRFS_NODE_EXE;
536 } else if (isgroup) {
537 if (dnp->dn_mode & S_IRGRP)
538 *flags |= DIRFS_NODE_RD;
539 if (dnp->dn_mode & S_IWGRP)
540 *flags |= DIRFS_NODE_WR;
541 if (dnp->dn_mode & S_IXGRP)
542 *flags |= DIRFS_NODE_EXE;
544 if (dnp->dn_mode & S_IROTH)
545 *flags |= DIRFS_NODE_RD;
546 if (dnp->dn_mode & S_IWOTH)
547 *flags |= DIRFS_NODE_WR;
548 if (dnp->dn_mode & S_IXOTH)
549 *flags |= DIRFS_NODE_EXE;
556 * This requires an allocated node and vnode, otherwise it'll panic
559 dirfs_open_helper(dirfs_mount_t dmp, dirfs_node_t dnp, int parentfd,
571 flags = error = perms = 0;
575 KKASSERT(dnp->dn_vnode);
578 * XXX Besides VDIR and VREG there are other file
580 * Also, O_RDWR alone might not be the best mode to open
581 * a file with, need to investigate which suits better.
583 dirfs_node_getperms(dnp, &perms);
585 if (dnp->dn_type & VDIR) {
586 flags |= O_DIRECTORY;
588 if (perms & DIRFS_NODE_WR)
593 if (relpath != NULL) {
596 KKASSERT(parentfd != DIRFS_NOFD);
597 } else if (parentfd == DIRFS_NOFD) {
598 pathnp = dirfs_findfd(dmp, dnp, &tmp, &pathfree);
599 parentfd = pathnp->dn_fd;
604 dnp->dn_fd = openat(parentfd, tmp, flags);
605 if (dnp->dn_fd == -1)
608 dbg(9, "dnp=%p tmp2=%s parentfd=%d flags=%d error=%d "
609 "flags=%08x w=%d x=%d\n", dnp, tmp, parentfd, flags, error,
613 dirfs_dropfd(dmp, pathnp, pathfree);
619 dirfs_close_helper(dirfs_node_t dnp)
626 if (dnp->dn_fd != DIRFS_NOFD) {
627 dbg(9, "closed fd on dnp=%p\n", dnp);
629 /* buffer cache buffers may still be present */
630 error = close(dnp->dn_fd); /* XXX EINTR should be checked */
631 dnp->dn_fd = DIRFS_NOFD;
639 dirfs_node_refcnt(dirfs_node_t dnp)
641 return dnp->dn_refcnt;
645 dirfs_node_chtimes(dirfs_node_t dnp)
653 vp = NODE_TO_VP(dnp);
654 dmp = VFS_TO_DIRFS(vp->v_mount);
656 KKASSERT(vn_islocked(vp));
658 if (dnp->dn_flags & (IMMUTABLE | APPEND))
661 tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree);
663 if((lutimes(tmp, NULL)) == -1)
666 dirfs_node_stat(DIRFS_NOFD, tmp, dnp);
667 dirfs_dropfd(dmp, NULL, pathfree);
669 KKASSERT(vn_islocked(vp));
676 dirfs_node_chflags(dirfs_node_t dnp, int vaflags, struct ucred *cred)
685 vp = NODE_TO_VP(dnp);
686 dmp = VFS_TO_DIRFS(vp->v_mount);
688 KKASSERT(vn_islocked(vp));
690 flags = dnp->dn_flags;
692 error = vop_helper_setattr_flags(&flags, vaflags, dnp->dn_uid, cred);
694 * When running vkernels with non-root it is not possible to set
695 * certain flags on host files, such as SF* flags. chflags(2) call
696 * will spit an error in that case.
699 tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree);
701 if((lchflags(tmp, flags)) == -1)
703 dirfs_node_stat(DIRFS_NOFD, tmp, dnp);
704 dirfs_dropfd(dmp, NULL, pathfree);
707 KKASSERT(vn_islocked(vp));
713 dirfs_node_chmod(dirfs_mount_t dmp, dirfs_node_t dnp, mode_t mode)
719 tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree);
721 if (lchmod(tmp, mode) < 0)
723 dirfs_node_stat(DIRFS_NOFD, tmp, dnp);
724 dirfs_dropfd(dmp, NULL, pathfree);
730 dirfs_node_chown(dirfs_mount_t dmp, dirfs_node_t dnp,
731 uid_t uid, uid_t gid, mode_t mode)
737 tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree);
739 if (lchown(tmp, uid, gid) < 0)
741 if (mode != dnp->dn_mode)
743 dirfs_node_stat(DIRFS_NOFD, tmp, dnp);
744 dirfs_dropfd(dmp, NULL, pathfree);
751 dirfs_node_chsize(dirfs_node_t dnp, off_t nsize)
763 vp = NODE_TO_VP(dnp);
764 dmp = VFS_TO_DIRFS(vp->v_mount);
766 osize = dnp->dn_size;
768 KKASSERT(vn_islocked(vp));
770 switch (vp->v_type) {
780 tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree);
782 error = nvtruncbuf(vp, nsize, biosize, -1, 0);
784 error = nvextendbuf(vp, osize, nsize,
788 if (error == 0 && truncate(tmp, nsize) < 0)
791 dnp->dn_size = nsize;
792 dbg(9, "TRUNCATE %016jx %016jx\n", (intmax_t)nsize, dnp->dn_size);
793 /*dirfs_node_stat(DIRFS_NOFD, tmp, dnp); don't need to do this*/
795 dirfs_dropfd(dmp, NULL, pathfree);
798 KKASSERT(vn_islocked(vp));
804 dirfs_node_setpassive(dirfs_mount_t dmp, dirfs_node_t dnp, int state)
808 dbg(5, "dnp=%p state=%d dnp->dn_fd=%d\n", dnp, state, dnp->dn_fd);
810 if (state && (dnp->dn_state & DIRFS_PASVFD) == 0 &&
811 dnp->dn_fd != DIRFS_NOFD) {
813 dirfs_node_setflags(dnp, DIRFS_PASVFD);
814 TAILQ_INSERT_TAIL(&dmp->dm_fdlist, dnp, dn_fdentry);
819 * If we are over our limit remove nodes from the
822 while (dmp->dm_fd_used > dirfs_fd_limit) {
823 dnp = TAILQ_FIRST(&dmp->dm_fdlist);
824 dirfs_node_setpassive(dmp, dnp, 0);
827 if (state == 0 && (dnp->dn_state & DIRFS_PASVFD)) {
828 dirfs_node_clrflags(dnp, DIRFS_PASVFD);
829 TAILQ_REMOVE(&dmp->dm_fdlist, dnp, dn_fdentry);
832 dbg(5, "dnp=%p removed from fdlist. %d used refs=%d\n",
833 dnp, dirfs_fd_used, dirfs_node_refcnt(dnp));
836 * Attempt to close the descriptor. We can only do this
837 * if the related vnode is inactive and has exactly two
838 * refs (representing the vp<->dnp and PASVFD). Otherwise
839 * someone might have ref'd the node in order to use the
842 * Also, if the vnode is in any way dirty we leave the fd
843 * open for the buffer cache code. The syncer will eventually
844 * come along and fsync the vnode, and the next inactive
845 * transition will deal with the descriptor.
847 * The descriptor for the root node is NEVER closed by
851 if (dirfs_node_refcnt(dnp) == 2 && vp &&
852 dnp->dn_fd != DIRFS_NOFD &&
853 !dirfs_node_isroot(dnp) &&
854 (vp->v_flag & (VINACTIVE|VOBJDIRTY)) == VINACTIVE &&
855 RB_EMPTY(&vp->v_rbdirty_tree)) {
856 dbg(9, "passive cache: closing %d\n", dnp->dn_fd);
858 dnp->dn_fd = DIRFS_NOFD;
860 if (dirfs_node_refcnt(dnp) == 1 && dnp->dn_vnode == NULL &&
861 dnp->dn_fd != DIRFS_NOFD &&
862 dnp != dmp->dm_root) {
863 dbg(9, "passive cache: closing %d\n", dnp->dn_fd);
865 dnp->dn_fd = DIRFS_NOFD;
868 dirfs_node_drop(dmp, dnp);
873 dirfs_flag2str(dirfs_node_t dnp)
875 const char *txtflg[] = { DIRFS_TXTFLG };
876 static char str[512] = {0};
878 if (dnp->dn_state & DIRFS_PASVFD)
879 ksprintf(str, "%s ", txtflg[0]);
885 debug(int level, const char *fmt, ...)
889 if (debuglvl >= level) {