2 * Copyright (c) 2013 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Antonio Huete Jimenez <tuxillo@quantumachine.net>
6 * by Matthew Dillon <dillon@dragonflybsd.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 #include <sys/mount.h>
42 #include <sys/queue.h>
43 #include <sys/spinlock2.h>
45 #include <sys/systm.h>
46 #include <sys/types.h>
47 #include <sys/vfscache.h>
48 #include <sys/vnode.h>
53 * Allocate and setup all is needed for the dirfs node to hold the filename.
54 * Note: dn_name is NULL terminated.
57 dirfs_node_setname(dirfs_node_t dnp, const char *name, int len)
62 kfree(dnp->dn_name, M_DIRFS_MISC);
63 dnp->dn_name = kmalloc(len + 1, M_DIRFS_MISC, M_WAITOK | M_ZERO);
64 bcopy(name, dnp->dn_name, len);
65 dnp->dn_name[len] = 0;
66 dnp->dn_namelen = len;
70 * Allocate enough space to hold a dirfs node structure.
71 * Note: Node name and length isn't handled here.
74 dirfs_node_alloc(struct mount *mp)
80 dnp = kmalloc(sizeof(*dnp), M_DIRFS_NODE, M_WAITOK | M_ZERO);
81 lockinit(&dnp->dn_lock, "dfsnode", 0, LK_CANRECURSE);
83 dnp->dn_fd = DIRFS_NOFD;
89 * Drops a reference to the node and. Node is freed when in the last reference.
92 dirfs_node_drop(dirfs_mount_t dmp, dirfs_node_t dnp)
96 if (dirfs_node_unref(dnp))
97 dirfs_node_free(dmp, dnp);
101 * Removes the association with its parent. Before freeing up its resources
102 * the node will be removed from the per-mount passive fd cache and its fd
103 * will be closed, either normally or forced.
106 dirfs_node_free(dirfs_mount_t dmp, dirfs_node_t dnp)
112 KKASSERT(dnp != NULL);
115 KKASSERT(dirfs_node_refcnt(dnp) == 0);
117 vp = NODE_TO_VP(dnp);
119 * Remove the inode from the passive fds list
120 * as we are tearing down the node.
121 * Root inode will be removed on VOP_UNMOUNT()
123 if (dnp->dn_parent) { /* NULL when children reaped parents */
124 dirfs_node_drop(dmp, dnp->dn_parent);
125 dnp->dn_parent = NULL;
127 dirfs_node_setpassive(dmp, dnp, 0);
129 kfree(dnp->dn_name, M_DIRFS_MISC);
134 * The file descriptor should have been closed already by the
135 * previous call to dirfs_set-passive. If not, force a sync and
138 if (dnp->dn_fd != DIRFS_NOFD) {
140 VOP_FSYNC(vp, MNT_WAIT, 0);
142 dnp->dn_fd = DIRFS_NOFD;
145 lockuninit(&dnp->dn_lock);
146 kfree(dnp, M_DIRFS_NODE);
153 * Do all the operations needed to get a resulting inode <--> host file
154 * association. This or may not include opening the file, which should be
155 * only needed when creating it.
157 * In the case vap is not NULL and openflags are specified, open the file.
160 dirfs_alloc_file(dirfs_mount_t dmp, dirfs_node_t *dnpp, dirfs_node_t pdnp,
161 struct namecache *ncp, struct vnode **vpp, struct vattr *vap,
176 mp = DIRFS_TO_VFS(dmp);
182 dnp = dirfs_node_alloc(mp);
183 KKASSERT(dnp != NULL);
185 dirfs_node_lock(dnp);
186 dirfs_node_setname(dnp, ncp->nc_name, ncp->nc_nlen);
187 dnp->dn_parent = pdnp;
188 dirfs_node_ref(pdnp); /* Children ref */
189 dirfs_node_unlock(dnp);
191 pathnp = dirfs_findfd(dmp, dnp, &tmp, &pathfree);
193 if (openflags && vap != NULL) {
194 dnp->dn_fd = openat(pathnp->dn_fd, tmp,
195 openflags, vap->va_mode);
196 if (dnp->dn_fd == -1) {
197 dirfs_dropfd(dmp, pathnp, pathfree);
202 error = dirfs_node_stat(pathnp->dn_fd, tmp, dnp);
203 if (error) { /* XXX Handle errors */
206 dirfs_free_vp(dmp, dnp);
207 dirfs_node_free(dmp, dnp);
208 dirfs_dropfd(dmp, pathnp, pathfree);
212 dirfs_alloc_vp(mp, &vp, LK_CANRECURSE, dnp);
216 dbg(9, "tmp=%s dnp=%p allocated\n", tmp, dnp);
217 dirfs_dropfd(dmp, pathnp, pathfree);
219 /* We want VOP_INACTIVE() to be called on last ref */
220 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE);
226 * Requires an already dirfs_node_t that has been already lstat(2)
227 * for the type comparison
230 dirfs_alloc_vp(struct mount *mp, struct vnode **vpp, int lkflags,
234 dirfs_mount_t dmp = VFS_TO_DIRFS(mp);
239 * Handle vnode reclaim/alloc races
244 if (vget(vp, LK_EXCLUSIVE) == 0)
246 /* vget raced a reclaim, retry */
248 getnewvnode(VT_UNUSED10, mp, &vp, 0, lkflags);
249 if (dnp->dn_vnode == NULL) {
252 vp->v_type = dnp->dn_type;
253 if (dmp->dm_root == dnp)
254 vsetflags(vp, VROOT);
255 dirfs_node_ref(dnp); /* ref for dnp<->vp */
257 /* Type-specific initialization. */
258 switch (dnp->dn_type) {
264 vinitvmio(vp, dnp->dn_size, BMASK, -1);
269 // vp->v_ops = &mp->mnt_vn_fifo_ops;
274 panic("dirfs_alloc_vp: dnp=%p vp=%p "
276 dnp, vp, dnp->dn_type);
284 /* multiple dirfs_alloc_vp calls raced, retry */
287 KKASSERT(vp != NULL);
289 dbg(9, "dnp=%p vp=%p type=%d\n", dnp, vp, vp->v_type);
293 * Do not call locked!
296 dirfs_free_vp(dirfs_mount_t dmp, dirfs_node_t dnp)
298 struct vnode *vp = NODE_TO_VP(dnp);
302 dnp->dn_vnode = NULL;
304 dirfs_node_drop(dmp, dnp);
308 dirfs_nodetype(struct stat *st)
311 mode_t mode = st->st_mode;
315 else if (S_ISBLK(mode))
317 else if (S_ISCHR(mode))
319 else if (S_ISFIFO(mode))
321 else if (S_ISSOCK(mode))
323 else if (S_ISLNK(mode))
325 else if (S_ISREG(mode))
334 dirfs_node_stat(int fd, const char *path, dirfs_node_t dnp)
340 if (fd == DIRFS_NOFD)
341 error = lstat(path, &st);
343 error = fstatat(fd, path, &st, AT_SYMLINK_NOFOLLOW);
348 /* Populate our dirfs node struct with stat data */
349 dnp->dn_uid = st.st_uid;
350 dnp->dn_gid = st.st_gid;
351 dnp->dn_mode = st.st_mode;
352 dnp->dn_flags = st.st_flags;
353 dnp->dn_links = st.st_nlink;
354 dnp->dn_atime = st.st_atime;
355 dnp->dn_atimensec = (st.st_atime * 1000000000L);
356 dnp->dn_mtime = st.st_mtime;
357 dnp->dn_mtimensec = (st.st_mtime * 1000000000L);
358 dnp->dn_ctime = st.st_ctime;
359 dnp->dn_ctimensec = (st.st_ctime * 1000000000L);
360 dnp->dn_gen = st.st_gen;
361 dnp->dn_ino = st.st_ino;
362 dnp->dn_st_dev = st.st_dev;
363 dnp->dn_size = st.st_size;
364 dnp->dn_type = dirfs_nodetype(&st);
370 dirfs_node_absolute_path(dirfs_mount_t dmp, dirfs_node_t cur, char **pathfreep)
372 return(dirfs_node_absolute_path_plus(dmp, cur, NULL, pathfreep));
376 dirfs_node_absolute_path_plus(dirfs_mount_t dmp, dirfs_node_t cur,
377 char *last, char **pathfreep)
386 KKASSERT(dmp->dm_root); /* Sanity check */
390 buf = kmalloc(MAXPATHLEN + 1, M_DIRFS_MISC, M_WAITOK);
393 * Passed-in trailing element.
400 if (count <= MAXPATHLEN)
401 bcopy(last, &buf[MAXPATHLEN - count], len);
403 if (count <= MAXPATHLEN)
404 buf[MAXPATHLEN - count] = '/';
408 * Iterate through the parents until we hit the root.
411 while (dirfs_node_isroot(dnp1) == 0) {
412 count += dnp1->dn_namelen;
413 if (count <= MAXPATHLEN) {
414 bcopy(dnp1->dn_name, &buf[MAXPATHLEN - count],
418 if (count <= MAXPATHLEN)
419 buf[MAXPATHLEN - count] = '/';
420 dnp1 = dnp1->dn_parent;
426 * Prefix with the root mount path. If the element was unlinked
427 * dnp1 will be NULL and there is no path.
429 len = strlen(dmp->dm_path);
431 if (dnp1 && count <= MAXPATHLEN) {
432 bcopy(dmp->dm_path, &buf[MAXPATHLEN - count], len);
434 dbg(9, "absolute_path %s\n", &buf[MAXPATHLEN - count]);
435 return (&buf[MAXPATHLEN - count]);
437 kfree(buf, M_DIRFS_MISC);
444 * Return a dirfs_node with a valid descriptor plus an allocated
445 * relative path which can be used in openat(), fstatat(), etc calls
446 * to locate the requested inode.
449 dirfs_findfd(dirfs_mount_t dmp, dirfs_node_t cur,
450 char **pathto, char **pathfreep)
464 buf = kmalloc(MAXPATHLEN + 1, M_DIRFS_MISC, M_WAITOK | M_ZERO);
468 while (dnp1 == cur || dnp1->dn_fd == DIRFS_NOFD) {
469 count += dnp1->dn_namelen;
470 if (count <= MAXPATHLEN) {
471 bcopy(dnp1->dn_name, &buf[MAXPATHLEN - count],
475 if (count <= MAXPATHLEN)
476 buf[MAXPATHLEN - count] = '/';
477 dnp1 = dnp1->dn_parent;
478 KKASSERT(dnp1 != NULL);
481 if (dnp1 && count <= MAXPATHLEN) {
483 *pathto = &buf[MAXPATHLEN - count + 1]; /* skip '/' prefix */
484 dirfs_node_ref(dnp1);
485 dbg(9, "fd=%d dnp1=%p dnp1->dn_name=%d &buf[off]=%s\n",
486 dnp1->dn_fd, dnp1, dnp1->dn_name, *pathto);
488 dbg(9, "failed too long\n");
489 kfree(buf, M_DIRFS_MISC);
498 dirfs_dropfd(dirfs_mount_t dmp, dirfs_node_t dnp1, char *pathfree)
501 kfree(pathfree, M_DIRFS_MISC);
503 dirfs_node_drop(dmp, dnp1);
507 dirfs_node_getperms(dirfs_node_t dnp, int *flags)
510 struct vnode *vp = dnp->dn_vnode;
515 * There must be an active vnode anyways since that
516 * would indicate the dirfs node has valid data for
517 * for dnp->dn_mode (via lstat syscall).
520 dmp = VFS_TO_DIRFS(vp->v_mount);
522 isowner = (dmp->dm_uid == dnp->dn_uid);
523 isgroup = (dmp->dm_gid == dnp->dn_gid);
526 if (dnp->dn_mode & S_IRUSR)
527 *flags |= DIRFS_NODE_RD;
528 if (dnp->dn_mode & S_IWUSR)
529 *flags |= DIRFS_NODE_WR;
530 if (dnp->dn_mode & S_IXUSR)
531 *flags |= DIRFS_NODE_EXE;
532 } else if (isgroup) {
533 if (dnp->dn_mode & S_IRGRP)
534 *flags |= DIRFS_NODE_RD;
535 if (dnp->dn_mode & S_IWGRP)
536 *flags |= DIRFS_NODE_WR;
537 if (dnp->dn_mode & S_IXGRP)
538 *flags |= DIRFS_NODE_EXE;
540 if (dnp->dn_mode & S_IROTH)
541 *flags |= DIRFS_NODE_RD;
542 if (dnp->dn_mode & S_IWOTH)
543 *flags |= DIRFS_NODE_WR;
544 if (dnp->dn_mode & S_IXOTH)
545 *flags |= DIRFS_NODE_EXE;
552 * This requires an allocated node and vnode, otherwise it'll panic
555 dirfs_open_helper(dirfs_mount_t dmp, dirfs_node_t dnp, int parentfd,
567 flags = error = perms = 0;
571 KKASSERT(dnp->dn_vnode);
574 * XXX Besides VDIR and VREG there are other file
576 * Also, O_RDWR alone might not be the best mode to open
577 * a file with, need to investigate which suits better.
579 dirfs_node_getperms(dnp, &perms);
581 if (dnp->dn_type & VDIR) {
582 flags |= O_DIRECTORY;
584 if (perms & DIRFS_NODE_WR)
589 if (relpath != NULL) {
592 KKASSERT(parentfd != DIRFS_NOFD);
593 } else if (parentfd == DIRFS_NOFD) {
594 pathnp = dirfs_findfd(dmp, dnp, &tmp, &pathfree);
595 parentfd = pathnp->dn_fd;
600 dnp->dn_fd = openat(parentfd, tmp, flags);
601 if (dnp->dn_fd == -1)
604 dbg(9, "dnp=%p tmp2=%s parentfd=%d flags=%d error=%d "
605 "flags=%08x w=%d x=%d\n", dnp, tmp, parentfd, flags, error,
609 dirfs_dropfd(dmp, pathnp, pathfree);
615 dirfs_close_helper(dirfs_node_t dnp)
622 if (dnp->dn_fd != DIRFS_NOFD) {
623 dbg(9, "closed fd on dnp=%p\n", dnp);
625 /* buffer cache buffers may still be present */
626 error = close(dnp->dn_fd); /* XXX EINTR should be checked */
627 dnp->dn_fd = DIRFS_NOFD;
635 dirfs_node_refcnt(dirfs_node_t dnp)
637 return dnp->dn_refcnt;
641 dirfs_node_chtimes(dirfs_node_t dnp)
649 vp = NODE_TO_VP(dnp);
650 dmp = VFS_TO_DIRFS(vp->v_mount);
652 KKASSERT(vn_islocked(vp));
654 if (dnp->dn_flags & (IMMUTABLE | APPEND))
657 tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree);
659 if((lutimes(tmp, NULL)) == -1)
662 dirfs_node_stat(DIRFS_NOFD, tmp, dnp);
663 dirfs_dropfd(dmp, NULL, pathfree);
665 KKASSERT(vn_islocked(vp));
672 dirfs_node_chflags(dirfs_node_t dnp, int vaflags, struct ucred *cred)
681 vp = NODE_TO_VP(dnp);
682 dmp = VFS_TO_DIRFS(vp->v_mount);
684 KKASSERT(vn_islocked(vp));
686 flags = dnp->dn_flags;
688 error = vop_helper_setattr_flags(&flags, vaflags, dnp->dn_uid, cred);
690 * When running vkernels with non-root it is not possible to set
691 * certain flags on host files, such as SF* flags. chflags(2) call
692 * will spit an error in that case.
695 tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree);
697 if((lchflags(tmp, flags)) == -1)
699 dirfs_node_stat(DIRFS_NOFD, tmp, dnp);
700 dirfs_dropfd(dmp, NULL, pathfree);
703 KKASSERT(vn_islocked(vp));
709 dirfs_node_chmod(dirfs_mount_t dmp, dirfs_node_t dnp, mode_t mode)
715 tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree);
717 if (lchmod(tmp, mode) < 0)
719 dirfs_node_stat(DIRFS_NOFD, tmp, dnp);
720 dirfs_dropfd(dmp, NULL, pathfree);
726 dirfs_node_chown(dirfs_mount_t dmp, dirfs_node_t dnp,
727 uid_t uid, uid_t gid, mode_t mode)
733 tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree);
735 if (lchown(tmp, uid, gid) < 0)
737 if (mode != dnp->dn_mode)
739 dirfs_node_stat(DIRFS_NOFD, tmp, dnp);
740 dirfs_dropfd(dmp, NULL, pathfree);
747 dirfs_node_chsize(dirfs_node_t dnp, off_t nsize)
759 vp = NODE_TO_VP(dnp);
760 dmp = VFS_TO_DIRFS(vp->v_mount);
762 osize = dnp->dn_size;
764 KKASSERT(vn_islocked(vp));
766 switch (vp->v_type) {
776 tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree);
778 error = nvtruncbuf(vp, nsize, biosize, -1, 0);
780 error = nvextendbuf(vp, osize, nsize,
784 if (error == 0 && truncate(tmp, nsize) < 0)
787 dnp->dn_size = nsize;
788 dbg(9, "TRUNCATE %016jx %016jx\n", (intmax_t)nsize, dnp->dn_size);
789 /*dirfs_node_stat(DIRFS_NOFD, tmp, dnp); don't need to do this*/
791 dirfs_dropfd(dmp, NULL, pathfree);
794 KKASSERT(vn_islocked(vp));
800 dirfs_node_setpassive(dirfs_mount_t dmp, dirfs_node_t dnp, int state)
804 dbg(5, "dnp=%p state=%d dnp->dn_fd=%d\n", dnp, state, dnp->dn_fd);
806 if (state && (dnp->dn_state & DIRFS_PASVFD) == 0 &&
807 dnp->dn_fd != DIRFS_NOFD) {
809 dirfs_node_setflags(dnp, DIRFS_PASVFD);
810 TAILQ_INSERT_TAIL(&dmp->dm_fdlist, dnp, dn_fdentry);
815 * If we are over our limit remove nodes from the
818 while (dmp->dm_fd_used > dirfs_fd_limit) {
819 dnp = TAILQ_FIRST(&dmp->dm_fdlist);
820 dirfs_node_setpassive(dmp, dnp, 0);
823 if (state == 0 && (dnp->dn_state & DIRFS_PASVFD)) {
824 dirfs_node_clrflags(dnp, DIRFS_PASVFD);
825 TAILQ_REMOVE(&dmp->dm_fdlist, dnp, dn_fdentry);
828 dbg(5, "dnp=%p removed from fdlist. %d used refs=%d\n",
829 dnp, dirfs_fd_used, dirfs_node_refcnt(dnp));
832 * Attempt to close the descriptor. We can only do this
833 * if the related vnode is inactive and has exactly two
834 * refs (representing the vp<->dnp and PASVFD). Otherwise
835 * someone might have ref'd the node in order to use the
838 * Also, if the vnode is in any way dirty we leave the fd
839 * open for the buffer cache code. The syncer will eventually
840 * come along and fsync the vnode, and the next inactive
841 * transition will deal with the descriptor.
843 * The descriptor for the root node is NEVER closed by
847 if (dirfs_node_refcnt(dnp) == 2 && vp &&
848 dnp->dn_fd != DIRFS_NOFD &&
849 !dirfs_node_isroot(dnp) &&
850 (vp->v_flag & (VINACTIVE|VOBJDIRTY)) == VINACTIVE &&
851 RB_EMPTY(&vp->v_rbdirty_tree)) {
852 dbg(9, "passive cache: closing %d\n", dnp->dn_fd);
854 dnp->dn_fd = DIRFS_NOFD;
856 if (dirfs_node_refcnt(dnp) == 1 && dnp->dn_vnode == NULL &&
857 dnp->dn_fd != DIRFS_NOFD &&
858 dnp != dmp->dm_root) {
859 dbg(9, "passive cache: closing %d\n", dnp->dn_fd);
861 dnp->dn_fd = DIRFS_NOFD;
864 dirfs_node_drop(dmp, dnp);
869 dirfs_flag2str(dirfs_node_t dnp)
871 const char *txtflg[] = { DIRFS_TXTFLG };
872 static char str[512] = {0};
874 if (dnp->dn_state & DIRFS_PASVFD)
875 ksprintf(str, "%s ", txtflg[0]);
881 debug(int level, const char *fmt, ...)
885 if (debuglvl >= level) {