Bring in DIRFS: A filesystem for VKERNELS
authorAntonio Huete Jimenez <tuxillo@quantumachine.net>
Wed, 22 Aug 2012 12:53:18 +0000 (14:53 +0200)
committerAntonio Huete Jimenez <tuxillo@quantumachine.net>
Thu, 5 Sep 2013 00:14:38 +0000 (02:14 +0200)
* What is DIRFS?
  dirfs is a pseudo-filesystem specific for vkernel(7) which allows mounting
  host's directories into the vkernel. It runs directly in the vkernel's VFS
  code, as any other regular filesystem, but it does syscalls (vkernels are
  userland programs) to retrieve or post the information needed on every
  operation requested.

  Needless to say that the operations that you can perform in the host
  directories/files depend on the permissions the user that runs the vkernel.
  For example, you will not be able to 'chflags schg' if you run the vkernel
  with a regular user and not with root.

* How does it work?
  It basically works like any other filesystem. It has its own mount_dirfs
  command that will be called by the system's mount(8) command when needed.

vkernel64 # mount -t dirfs /usr/src2 /mnt
vkernel64 # df -h /mnt
Filesystem        Size   Used  Avail Capacity  Mounted on
dirfs@/usr/src2    47G    36G    12G    75%    /mnt

  Umounting is a normal operation too:

  vkernel64 # mount | fgrep dirfs
dirfs@/usr/src2 on /mnt (dirfs)
vkernel64 # umount /mnt

* What's the current status
  Currently it is in a *experimental* status, with (probably) many bugs and some
  parts missing.

TODO
  - Make dirfs mpsafe.
  - Fix problems with multiple mount points.
  - Implement VOP_NLINK so that hardlinks are possible.
  - Add missing kqueue(2) support.
  - dirfs root so that a vkernel can be booted from it.
  - Locking mechanisms for opened fds between host <-> vkernel.
  - Make sure dirfs is properly restored after vkernel checkpointing
    (upcoming GSoC project).
  - Bug hunting & bug fixing.
  - Any ideas?

14 files changed:
sbin/Makefile
sbin/mount_dirfs/Makefile [new file with mode: 0644]
sbin/mount_dirfs/mount_dirfs.8 [new file with mode: 0644]
sbin/mount_dirfs/mount_dirfs.c [new file with mode: 0644]
share/man/man5/Makefile
share/man/man5/dirfs.5 [new file with mode: 0644]
sys/platform/vkernel/conf/files
sys/platform/vkernel/conf/options
sys/platform/vkernel64/conf/files
sys/platform/vkernel64/conf/options
sys/vfs/dirfs/dirfs.h [new file with mode: 0644]
sys/vfs/dirfs/dirfs_subr.c [new file with mode: 0644]
sys/vfs/dirfs/dirfs_vfsops.c [new file with mode: 0644]
sys/vfs/dirfs/dirfs_vnops.c [new file with mode: 0644]

index c8663fc..0c52329 100644 (file)
@@ -50,6 +50,7 @@ SUBDIR=       adjkerntz \
        mount_ufs \
        mount_cd9660 \
        mount_devfs \
+       mount_dirfs \
        mount_ext2fs \
        mount_hammer \
        mount_hpfs \
diff --git a/sbin/mount_dirfs/Makefile b/sbin/mount_dirfs/Makefile
new file mode 100644 (file)
index 0000000..2e289de
--- /dev/null
@@ -0,0 +1,10 @@
+PROG=  mount_dirfs
+SRCS=  mount_dirfs.c
+MAN=    mount_dirfs.8
+
+LDADD=  -lutil
+DPADD=  ${LIBUTIL}
+
+#CFLAGS+= -I${.CURDIR}/../../sys
+
+.include <bsd.prog.mk>
diff --git a/sbin/mount_dirfs/mount_dirfs.8 b/sbin/mount_dirfs/mount_dirfs.8
new file mode 100644 (file)
index 0000000..5e50390
--- /dev/null
@@ -0,0 +1,66 @@
+.\"
+.\" Copyright (c) 2013 Antonio Huete Jimenez <tuxillo@quantumachine.net>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd September 5, 2013
+.Dt MOUNT_DIRFS 8
+.Os
+.Sh NAME
+.Nm mount_dirfs
+.Nd mount an host directory inside a vkernel
+.Sh SYNOPSIS
+.Nm
+.Op Fl o Ar options
+.Ar hostdir
+.Ar mount_point
+.Sh DESCRIPTION
+The
+.Nm
+allows
+.Xr vkernel 7
+to access host directories with minimal configuration.
+.Pp
+The following options are supported:
+.Bl -tag -width XoXoptions
+.It Fl o Ar options
+Options are specified with a
+.Fl o
+flag followed by a comma-separated string of options.
+See the
+.Xr mount 8
+and
+.Xr dirfs 5
+man page for possible options and their meanings.
+.El
+.Sh EXAMPLES
+The command below mounts host directory
+.Pa /usr/src
+on vkernel's directory
+.Pa /mnt :
+.Pp
+.Ic "mount -t dirfs /usr/src /mnt"
+.Sh SEE ALSO
+.Xr fstab 5 ,
+.Xr dirfs 5 ,
+.Xr mount 8
diff --git a/sbin/mount_dirfs/mount_dirfs.c b/sbin/mount_dirfs/mount_dirfs.c
new file mode 100644 (file)
index 0000000..65dd41e
--- /dev/null
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2013 The DragonFly Project.  All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Antonio Huete Jimenez <tuxillo@quantumachine.net>
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/diskslice.h>
+#include <sys/diskmbr.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/syslimits.h>
+#include <sys/mount.h>
+#include <sys/sysctl.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <uuid.h>
+#include <err.h>
+#include <assert.h>
+#include <ctype.h>
+#include <mntopts.h>
+
+#define MOPT_UPDATE         { "update",     0, MNT_UPDATE, 0 }
+#define PLATFORM_LEN   16
+
+static struct mntopt mopts[] = { MOPT_STDOPTS, MOPT_UPDATE, MOPT_NULL };
+
+static void usage(void);
+
+int
+main(int ac, char **av)
+{
+       struct vfsconf vfc;
+       int mount_flags = 0;
+       int error;
+       int ch;
+       int init_flags = 0;
+       char *mountpt, *hostdir;
+       size_t vsize;
+       char platform[PLATFORM_LEN] = {0};
+
+       mount_flags = 0;
+
+       while ((ch = getopt(ac, av, "o:u")) != -1) {
+               switch(ch) {
+                case 'u':
+                        init_flags |= MNT_UPDATE;
+                        break;
+
+               case 'o':
+                       getmntopts(optarg, mopts, &mount_flags, NULL);
+                       break;
+               default:
+                       usage();
+                       /* not reached */
+               }
+       }
+       ac -= optind;
+       av += optind;
+       mount_flags |= init_flags;
+
+       /*
+        * Check we're in a vkernel or abort.
+        */
+       vsize = PLATFORM_LEN;
+       error = sysctlbyname("hw.platform", &platform, &vsize, NULL,0);
+       if (error)
+               errx(1, "Failed to get hw.platform sysctl");
+
+       if (strnstr(platform, "vkernel", PLATFORM_LEN) == NULL)
+               errx(1, "dirfs is only available for vkernels.");
+
+        /*
+         * Only the mount point need be specified in update mode.
+         */
+        if (init_flags & MNT_UPDATE) {
+                if (ac != 1) {
+                        usage();
+                        /* not reached */
+                }
+                mountpt = av[0];
+                if (mount(vfc.vfc_name, mountpt, mount_flags, NULL))
+                        err(1, "mountpoint %s", mountpt);
+                exit(0);
+        }
+
+       if (ac < 2) {
+               usage();
+               /* not reached */
+       }
+
+       hostdir = av[0];
+       mountpt = av[1];
+
+       /*
+        * Load the dirfs module if necessary (this bit stolen from
+        * mount_null).
+        */
+       error = getvfsbyname("dirfs", &vfc);
+       if (error && vfsisloadable("dirfs")) {
+               if (vfsload("dirfs") != 0)
+                       err(1, "vfsload(dirfs)");
+               endvfsent();
+               error = getvfsbyname("dirfs", &vfc);
+       }
+       if (error)
+               errx(1, "dirfs filesystem is not available");
+
+       error = mount(vfc.vfc_name, mountpt, mount_flags, hostdir);
+       if (error)
+               err(1, "failed to mount %s on %s", hostdir, mountpt);
+
+       exit (0);
+}
+
+static
+void
+usage(void)
+{
+       fprintf(stderr, "usage: mount_dirfs [-u] [-o options] "
+                       "hostdir dir\n");
+       exit(1);
+}
index 9e022ed..cc25f05 100644 (file)
@@ -10,6 +10,7 @@ MAN=  acct.5 \
        devfs.5 \
        devtab.5 \
        dir.5 \
+       dirfs.5 \
        disktab.5 \
        elf.5 \
        ethers.5 \
diff --git a/share/man/man5/dirfs.5 b/share/man/man5/dirfs.5
new file mode 100644 (file)
index 0000000..8f6b626
--- /dev/null
@@ -0,0 +1,103 @@
+.\"
+.\" Copyright (c) 2013 Antonio Huete Jimenez <tuxillo@quantumachine.net>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\" 3. The name of the author may not be used to endorse or promote products
+.\"    derived from this software without specific prior written permission
+.\"
+.\" THIS DOCUMENTATION IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+.\"
+.\"
+.Dd September 5, 2013
+.Dt DIRFS 5
+.Os
+.Sh NAME
+.Nm dirfs
+.Nd "pseudo-filesystem for vkernel"
+.Sh SYNOPSIS
+To compile this driver into the vkernel,
+place the following line in your
+vkernel configuration file:
+.Bd -ragged -offset indent
+.Cd "options DIRFS"
+.Ed
+.Pp
+Actually this driver does not provide a loadable module.
+.Pp
+In
+.Xr fstab 5 :
+.Bd -literal -compact
+/usr/src     /mnt dirfs rw 0 0
+.Sh DESCRIPTION
+.Nm
+was born from the idea of providing an easy way for
+.Xr vkernel 7
+to access host's directories without any sort of configuration as it would be
+needed by NFS for example.
+.Pp
+It runs directly in the vkernel's
+.Xr VFS 9
+code, as any other regular filesystem but it uses syscalls to retrieve the
+information needed for every operation requested.
+.Pp
+It should be noted that when the vkernel is run by a regular user, the
+operations
+.Nm
+can perform on the mounted host directory are bound to the permissions of
+the aforementioned user.
+.Pp
+Multiple
+.Nm
+mounts are allowed.
+.Sh EXAMPLES
+To mount a
+.Nm
+memory file system:
+.Pp
+.Dl "mount -t dirfs /usr/src /mnt"
+.Sh SEE ALSO
+.Xr fstab 5 ,
+.Xr mount_dirfs 8 ,
+.Sh HISTORY
+The
+.Nm
+driver first appeared in
+.Dx 3.5 .
+.Sh AUTHORS
+.An -nosplit
+The
+.Nm
+vkernel implementation was written from the scratch by
+.An Antonio Huete Jimenez Aq Mt tuxillo@quantumachine.net
+.Pp
+Numerous fixes and pointers by
+.An Matthew Dillon Aq Mt dillon@apollo.backplane.com
+.Pp
+This manual page was written by
+.An Antonio Huete Jimenez Aq Mt tuxillo@quantumachine.net
+.Sh BUGS
+Currently there is no locking on file descriptors between the host
+and the vkernel.
+This means that there might be problems with concurrent accesses to the same
+file.
+.Pp
+There is no support for hardlinks in
+.Nm
+yet.
index 28f42b4..4d3db4c 100644 (file)
@@ -54,6 +54,9 @@ kern/subr_diskgpt.c                   standard
 dev/virtual/vkernel/cdrom/vcd.c                optional        vcd
 dev/virtual/vkernel/disk/vdisk.c       optional        vkd
 dev/virtual/vkernel/net/if_vke.c       optional        vke
+vfs/dirfs/dirfs_vnops.c                        optional        dirfs
+vfs/dirfs/dirfs_vfsops.c               optional        dirfs
+vfs/dirfs/dirfs_subr.c         optional        dirfs
 
 # PLATFORM FILES
 #
index e1085b1..77a7d72 100644 (file)
@@ -4,3 +4,9 @@ I586_CPU                opt_global.h
 I686_CPU                opt_global.h
 
 COMPAT_DF12            opt_compatdf12.h
+
+# Static filesystems
+DIRFS           opt_dontuse.h
+
+# KTR options
+KTR_DIRFS       opt_ktr.h
index 65d7593..ba1417a 100644 (file)
@@ -44,6 +44,9 @@ kern/subr_diskgpt.c                   standard
 dev/virtual/vkernel/cdrom/vcd.c                optional        vcd
 dev/virtual/vkernel/disk/vdisk.c       optional        vkd
 dev/virtual/vkernel/net/if_vke.c       optional        vke
+vfs/dirfs/dirfs_vnops.c                        optional        dirfs
+vfs/dirfs/dirfs_vfsops.c               optional        dirfs
+vfs/dirfs/dirfs_subr.c         optional        dirfs
 
 # PLATFORM FILES
 #
index b9335f8..223439f 100644 (file)
@@ -1,3 +1,9 @@
 # The cpu type
 #
 HAMMER_CPU              opt_global.h
+
+# Static filesystems
+DIRFS           opt_dontuse.h
+
+# KTR options
+KTR_DIRFS      opt_ktr.h
\ No newline at end of file
diff --git a/sys/vfs/dirfs/dirfs.h b/sys/vfs/dirfs/dirfs.h
new file mode 100644 (file)
index 0000000..6a41329
--- /dev/null
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2013 The DragonFly Project.  All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Antonio Huete Jimenez <tuxillo@quantumachine.net>
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#ifndef _SYS_VFS_DIRFS_DIRFS_H_
+#define _SYS_VFS_DIRFS_DIRFS_H_
+
+#include <unistd.h>
+
+#include <sys/lockf.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+
+MALLOC_DECLARE(M_DIRFS);
+MALLOC_DECLARE(M_DIRFS_NODE);
+MALLOC_DECLARE(M_DIRFS_MISC);
+
+#ifndef KTR_DIRFS
+#define KTR_DIRFS KTR_ALL
+#endif
+
+#define DIRFS_NOFD     -1      /* No fd present */
+
+#define DIRFS_ROOT     0x00000001
+#define DIRFS_PASVFD   0x00000002
+
+#define DIRFS_TXTFLG "pasvfd"
+
+/* Used for buffer cache operations */
+#define BSIZE  16384
+#define BMASK  (BSIZE - 1)
+
+/*
+ * XXX This should be temporary. A semi-proper solution would be to expose
+ * below prototypes in the _KERNEL_VIRTUAL case.
+ */
+extern int getdirentries(int, char *, int, long *);
+extern int statfs(const char *, struct statfs *);
+
+/*
+ * Debugging macros. The impact should be determined and in case it has a
+ * considerable performance penalty, it should be enclosed in a DEBUG #ifdef.
+ */
+#define debug_called() do {                                    \
+               dbg(9, "called\n", __func__);                   \
+} while(0)
+
+#define dbg(lvl, fmt, ...) do {                                        \
+               debug(lvl, "%s: " fmt, __func__, ##__VA_ARGS__);        \
+} while(0)
+
+#define debug_node(s) do {                                             \
+               dbg(5, "mode=%u flags=%u dn_name=%s "                   \
+                   "uid=%u gid=%u objtype=%u nlinks=%d "               \
+                   "size=%jd ctime=%ju atime=%ju mtime=%ju\n",         \
+                   s->dn_mode, s->dn_flags, s->dn_name,                \
+                   s->dn_uid, s->dn_gid, s->dn_type,                   \
+                   s->dn_links, s->dn_size,                            \
+                   s->dn_ctime, s->dn_atime,                           \
+                   s->dn_mtime);                                       \
+} while(0)
+
+#define debug_node2(n) do {                                            \
+               dbg(5, "dnp=%p name=%s fd=%d parent=%p vnode=%p "       \
+                   "refcnt=%d state=%s\n",                             \
+                   n, n->dn_name, n->dn_fd, n->dn_parent, n->dn_vnode, \
+                   n->dn_refcnt, dirfs_flag2str(n));                   \
+} while(0)
+
+/*
+ * Locking macros
+ */
+#define dirfs_node_islocked(n) (lockstatus(&(n)->dn_lock,curthread) == LK_EXCLUSIVE)
+#define dirfs_node_lock(n)     lockmgr(&(n)->dn_lock, LK_EXCLUSIVE|LK_RETRY)
+#define dirfs_node_unlock(n)   lockmgr(&(n)->dn_lock, LK_RELEASE)
+#define dirfs_mount_lock(m)    lockmgr(&(m)->dm_lock, LK_EXCLUSIVE|LK_RETRY)
+#define dirfs_mount_unlock(m)  lockmgr(&(m)->dm_lock, LK_RELEASE)
+#define dirfs_mount_gettoken(m)        lwkt_gettoken(&(m)->dm_token)
+#define dirfs_mount_reltoken(m)        lwkt_reltoken(&(m)->dm_token)
+
+#define dirfs_node_isroot(n)   (n->dn_state & DIRFS_ROOT)
+
+/*
+ * Main in-memory node structure which will represent a host file when active.
+ * Upon VOP_NRESOLVE() an attempt to initialize its generic fields will be made
+ * via a fstatat(2)/lstat(2) call.
+ */
+struct dirfs_node {
+       enum vtype              dn_type;        /* Node type. Same as vnode
+                                                  type for simplicty */
+
+       int                     dn_state;       /* Node state flags */
+
+       TAILQ_ENTRY(dirfs_node) dn_fdentry;     /* Passive fd cache */
+       RB_ENTRY(dirfs_node)    dn_rbentry;     /* Inode no. lookup */
+
+       int                     dn_refcnt;      /* Refs from children */
+       int                     dn_fd;          /* File des. for open(2) */
+
+       struct dirfs_node *     dn_parent;      /* Pointer to parent node */
+
+       struct vnode *          dn_vnode;       /* Reference to its vnode on
+                                                  the vkernel scope */
+       char *                  dn_name;
+       int                     dn_namelen;
+
+        struct lockf            dn_advlock;
+       struct lock             dn_lock;
+
+       uint32_t                dn_st_dev;      /* Device number */
+
+       /* Generic attributes */
+       ino_t                   dn_ino;
+       long                    dn_blocksize;
+       uid_t                   dn_uid;
+       gid_t                   dn_gid;
+       mode_t                  dn_mode;
+       int                     dn_flags;
+       nlink_t                 dn_links;
+       int32_t                 dn_atime;
+       int32_t                 dn_atimensec;
+       int32_t                 dn_mtime;
+       int32_t                 dn_mtimensec;
+       int32_t                 dn_ctime;
+       int32_t                 dn_ctimensec;
+       unsigned long           dn_gen;
+       off_t                   dn_size;
+};
+typedef struct dirfs_node *dirfs_node_t;
+
+/*
+ * In-memory dirfs mount structure. It corresponds to a mounted
+ * dirfs filesystem.
+ */
+struct dirfs_mount {
+       RB_HEAD(, dn_rbentry) dm_inotree;
+       TAILQ_HEAD(, dirfs_node) dm_fdlist;
+
+       struct lock             dm_lock;
+       struct lwkt_token       dm_token;
+       dirfs_node_t            dm_root;        /* Root dirfs node */
+       struct mount *          dm_mount;
+       int                     dm_rdonly;
+
+       int                     dm_fd_used;     /* Opened file descriptors */
+
+       char                    dm_path[MAXPATHLEN];
+};
+typedef struct dirfs_mount *dirfs_mount_t;
+
+/*
+ * VFS <-> DIRFS conversion macros
+ */
+#define VFS_TO_DIRFS(mp)       ((dirfs_mount_t)((mp)->mnt_data))
+#define DIRFS_TO_VFS(dmp)      ((struct mount *)((dmp)->dm_mount))
+#define VP_TO_NODE(vp)         ((dirfs_node_t)((vp)->v_data))
+#define NODE_TO_VP(dnp)                ((dnp)->dn_vnode)
+
+/* Misc stuff */
+extern int debuglvl;
+extern int dirfs_fd_limit;
+extern int dirfs_fd_used;
+extern long passive_fd_list_miss;
+extern long passive_fd_list_hits;
+
+extern struct vop_ops dirfs_vnode_vops;
+
+/*
+ * Misc functions for node flags and reference count
+ */
+static __inline void
+dirfs_node_ref(dirfs_node_t dnp)
+{
+       atomic_add_int(&dnp->dn_refcnt, 1);
+}
+
+static __inline int
+dirfs_node_unref(dirfs_node_t dnp)
+{
+       /*
+        * Returns non-zero on last unref.
+        */
+       KKASSERT(dnp->dn_refcnt > 0);
+       return (atomic_fetchadd_int(&dnp->dn_refcnt, -1) == 1);
+}
+
+static __inline void
+dirfs_node_setflags(dirfs_node_t dnp, int flags)
+{
+       atomic_set_int(&dnp->dn_state, flags);
+}
+
+static __inline void
+dirfs_node_clrflags(dirfs_node_t dnp, int flags)
+{
+       atomic_clear_int(&dnp->dn_state, flags);
+}
+
+
+/*
+ * Prototypes
+ */
+dirfs_node_t dirfs_node_alloc(struct mount *);
+int dirfs_node_stat(int, const char *, dirfs_node_t);
+int dirfs_nodetype(struct stat *);
+void dirfs_node_setname(dirfs_node_t, const char *, int);
+char *dirfs_node_fullpath(dirfs_mount_t, const char *);
+int dirfs_node_free(dirfs_mount_t, dirfs_node_t);
+void dirfs_node_drop(dirfs_mount_t dmp, dirfs_node_t dnp);
+void dirfs_node_setpassive(dirfs_mount_t dmp, dirfs_node_t dnp, int state);
+void dirfs_alloc_vp(struct mount *, struct vnode **, int, dirfs_node_t);
+void dirfs_free_vp(dirfs_mount_t, dirfs_node_t);
+int dirfs_alloc_file(dirfs_mount_t, dirfs_node_t *, dirfs_node_t,
+    struct namecache *, struct vnode **, struct vattr *, int);
+dirfs_node_t dirfs_findfd(dirfs_mount_t dmp, dirfs_node_t cur,
+                       char **pathto, char **pathfree);
+void dirfs_dropfd(dirfs_mount_t dmp, dirfs_node_t dnp1, char *pathfree);
+char *dirfs_node_absolute_path(dirfs_mount_t, dirfs_node_t, char **);
+char *dirfs_node_absolute_path_plus(dirfs_mount_t, dirfs_node_t,
+                       char *, char **);
+int dirfs_open_helper(dirfs_mount_t, dirfs_node_t, int, char *);
+int dirfs_close_helper(dirfs_node_t);
+int dirfs_node_refcnt(dirfs_node_t);
+char *dirfs_flag2str(dirfs_node_t);
+int dirfs_node_getperms(dirfs_node_t, int *, int *, int *);
+int dirfs_node_chflags(dirfs_node_t, int, struct ucred *);
+int dirfs_node_chtimes(dirfs_node_t);
+int dirfs_node_chmod(dirfs_mount_t, dirfs_node_t, mode_t cur_mode);
+int dirfs_node_chown(dirfs_mount_t, dirfs_node_t,
+                       uid_t cur_uid, uid_t cur_gid, mode_t cur_mode);
+int dirfs_node_chsize(dirfs_node_t, off_t);
+void debug(int, const char *, ...);
+
+#endif /* _SYS_VFS_DIRFS_DIRFS_H_ */
diff --git a/sys/vfs/dirfs/dirfs_subr.c b/sys/vfs/dirfs/dirfs_subr.c
new file mode 100644 (file)
index 0000000..1a1d6d7
--- /dev/null
@@ -0,0 +1,891 @@
+/*
+ * Copyright (c) 2013 The DragonFly Project.  All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Antonio Huete Jimenez <tuxillo@quantumachine.net>
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <sys/mount.h>
+#include <sys/queue.h>
+#include <sys/spinlock2.h>
+#include <sys/stat.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/vfscache.h>
+#include <sys/vnode.h>
+
+#include "dirfs.h"
+
+/*
+ * Allocate and setup all is needed for the dirfs node to hold the filename.
+ * Note: dn_name is NULL terminated.
+ */
+void
+dirfs_node_setname(dirfs_node_t dnp, const char *name, int len)
+{
+       debug_called();
+
+       if (dnp->dn_name)
+               kfree(dnp->dn_name, M_DIRFS_MISC);
+       dnp->dn_name = kmalloc(len + 1, M_DIRFS_MISC, M_WAITOK | M_ZERO);
+       bcopy(name, dnp->dn_name, len);
+       dnp->dn_name[len] = 0;
+       dnp->dn_namelen = len;
+}
+
+/*
+ * Allocate enough space to hold a dirfs node structure.
+ * Note: Node name and length isn't handled here.
+ */
+dirfs_node_t
+dirfs_node_alloc(struct mount *mp)
+{
+        dirfs_node_t dnp;
+
+        debug_called();
+
+        dnp = kmalloc(sizeof(*dnp), M_DIRFS_NODE, M_WAITOK | M_ZERO);
+        lockinit(&dnp->dn_lock, "dfsnode", 0, LK_CANRECURSE);
+
+       dnp->dn_fd = DIRFS_NOFD;
+
+        return dnp;
+}
+
+/*
+ * Drops a reference to the node and. Node is freed when in the last reference.
+ */
+void
+dirfs_node_drop(dirfs_mount_t dmp, dirfs_node_t dnp)
+{
+       if (dirfs_node_unref(dnp))
+               dirfs_node_free(dmp, dnp);
+}
+
+/*
+ * Removes the association with its parent. Before freeing up its resources
+ * the node will be removed from the per-mount passive fd cache and its fd
+ * will be closed, either normally or forced.
+ */
+int
+dirfs_node_free(dirfs_mount_t dmp, dirfs_node_t dnp)
+{
+       struct vnode *vp;
+
+       debug_called();
+
+       KKASSERT(dnp != NULL);
+       debug_node2(dnp);
+
+       KKASSERT(dirfs_node_refcnt(dnp) == 0);
+
+       vp = NODE_TO_VP(dnp);
+       /*
+        * Remove the inode from the passive fds list
+        * as we are tearing down the node.
+        * Root inode will be removed on VOP_UNMOUNT()
+        */
+       dirfs_mount_gettoken(dmp);
+
+       if (dnp->dn_parent) {   /* NULL when children reaped parents */
+               dirfs_node_drop(dmp, dnp->dn_parent);
+               dnp->dn_parent = NULL;
+       }
+       dirfs_node_setpassive(dmp, dnp, 0);
+       if (dnp->dn_name) {
+               kfree(dnp->dn_name, M_DIRFS_MISC);
+               dnp->dn_name = NULL;
+       }
+
+       /*
+        * The file descriptor should have been closed already by the
+        * previous call to dirfs_set-passive. If not, force a sync and
+        * close it.
+        */
+       if (dnp->dn_fd != DIRFS_NOFD) {
+               if (dnp->dn_vnode)
+                       VOP_FSYNC(vp, MNT_WAIT, 0);
+               close(dnp->dn_fd);
+               dnp->dn_fd = DIRFS_NOFD;
+       }
+
+       lockuninit(&dnp->dn_lock);
+       kfree(dnp, M_DIRFS_NODE);
+       dnp = NULL;
+
+       dirfs_mount_reltoken(dmp);
+
+       return 0;
+}
+
+/*
+ * Do all the operations needed to get a resulting inode <--> host file
+ * association. This or may not include opening the file, which should be
+ * only needed when creating it.
+ *
+ * In the case vap is not NULL and openflags are specified, open the file.
+ */
+int
+dirfs_alloc_file(dirfs_mount_t dmp, dirfs_node_t *dnpp, dirfs_node_t pdnp,
+    struct namecache *ncp, struct vnode **vpp, struct vattr *vap,
+    int openflags)
+{
+       dirfs_node_t dnp;
+       dirfs_node_t pathnp;
+       struct vnode *vp;
+       struct mount *mp;
+       char *tmp;
+       char *pathfree;
+       int error;
+
+       debug_called();
+
+       error = 0;
+       vp = NULL;
+       mp = DIRFS_TO_VFS(dmp);
+
+       /* Sanity check */
+       if (pdnp == NULL)
+               return EINVAL;
+
+       dnp = dirfs_node_alloc(mp);
+       KKASSERT(dnp != NULL);
+
+       dirfs_node_lock(dnp);
+       dirfs_node_setname(dnp, ncp->nc_name, ncp->nc_nlen);
+       dnp->dn_parent = pdnp;
+       dirfs_node_ref(pdnp);   /* Children ref */
+       dirfs_node_unlock(dnp);
+
+       pathnp = dirfs_findfd(dmp, dnp, &tmp, &pathfree);
+
+       if (openflags && vap != NULL) {
+               dnp->dn_fd = openat(pathnp->dn_fd, tmp,
+                                   openflags, vap->va_mode);
+               if (dnp->dn_fd == -1) {
+                       dirfs_dropfd(dmp, pathnp, pathfree);
+                       return errno;
+               }
+       }
+
+       error = dirfs_node_stat(pathnp->dn_fd, tmp, dnp);
+       if (error) {            /* XXX Handle errors */
+               error = errno;
+               if (vp)
+                       dirfs_free_vp(dmp, dnp);
+               dirfs_node_free(dmp, dnp);
+               dirfs_dropfd(dmp, pathnp, pathfree);
+               return error;
+       }
+
+       dirfs_alloc_vp(mp, &vp, LK_CANRECURSE, dnp);
+       *vpp = vp;
+       *dnpp = dnp;
+
+       dbg(5, "tmp=%s dnp=%p allocated\n", tmp, dnp);
+       dirfs_dropfd(dmp, pathnp, pathfree);
+
+       return error;
+}
+
+/*
+ * Requires an already dirfs_node_t that has been already lstat(2)
+ * for the type comparison
+ */
+void
+dirfs_alloc_vp(struct mount *mp, struct vnode **vpp, int lkflags,
+              dirfs_node_t dnp)
+{
+       struct vnode *vp;
+       dirfs_mount_t dmp = VFS_TO_DIRFS(mp);
+
+       debug_called();
+
+       /*
+        * Handle vnode reclaim/alloc races
+        */
+       for (;;) {
+               vp = dnp->dn_vnode;
+               if (vp) {
+                       if (vget(vp, LK_EXCLUSIVE) == 0)
+                               break;  /* success */
+                       /* vget raced a reclaim, retry */
+               } else {
+                       getnewvnode(VT_UNUSED10, mp, &vp, 0, lkflags);
+                       if (dnp->dn_vnode == NULL) {
+                               dnp->dn_vnode = vp;
+                               vp->v_data = dnp;
+                               vp->v_type = dnp->dn_type;
+                               if (dmp->dm_root == dnp)
+                                       vsetflags(vp, VROOT);
+                               dirfs_node_ref(dnp);    /* ref for dnp<->vp */
+
+                               /* Type-specific initialization. */
+                               switch (dnp->dn_type) {
+                               case VBLK:
+                               case VCHR:
+                               case VSOCK:
+                                       break;
+                               case VREG:
+                                       vinitvmio(vp, dnp->dn_size, BMASK, -1);
+                                       break;
+                               case VLNK:
+                                       break;
+                               case VFIFO:
+                       //              vp->v_ops = &mp->mnt_vn_fifo_ops;
+                                       break;
+                               case VDIR:
+                                       break;
+                               default:
+                                       panic("dirfs_alloc_vp: dnp=%p vp=%p "
+                                             "type=%d",
+                                             dnp, vp, dnp->dn_type);
+                                       /* NOT REACHED */
+                                       break;
+                               }
+                               break;  /* success */
+                       }
+                       vp->v_type = VBAD;
+                       vx_put(vp);
+                       /* multiple dirfs_alloc_vp calls raced, retry */
+               }
+       }
+       KKASSERT(vp != NULL);
+       *vpp = vp;
+       dbg(5, "dnp=%p vp=%p type=%d\n", dnp, vp, vp->v_type);
+}
+
+/*
+ * Do not call locked!
+ */
+void
+dirfs_free_vp(dirfs_mount_t dmp, dirfs_node_t dnp)
+{
+       struct vnode *vp = NODE_TO_VP(dnp);
+
+       dnp->dn_vnode = NULL;
+       vp->v_data = NULL;
+       dirfs_node_drop(dmp, dnp);
+}
+
+int
+dirfs_nodetype(struct stat *st)
+{
+       int ret;
+       mode_t mode = st->st_mode;
+
+       debug_called();
+
+       if (S_ISDIR(mode))
+               ret = VDIR;
+       else if (S_ISBLK(mode))
+               ret = VBLK;
+       else if (S_ISCHR(mode))
+               ret = VCHR;
+       else if (S_ISFIFO(mode))
+               ret = VFIFO;
+       else if (S_ISSOCK(mode))
+               ret = VSOCK;
+       else if (S_ISLNK(mode))
+               ret = VLNK;
+       else if (S_ISREG(mode))
+               ret = VREG;
+       else
+               ret = VBAD;
+
+       return ret;
+}
+
+int
+dirfs_node_stat(int fd, const char *path, dirfs_node_t dnp)
+{
+       struct stat st;
+       int error;
+
+       debug_called();
+       if (fd == DIRFS_NOFD)
+               error = lstat(path, &st);
+       else
+               error = fstatat(fd, path, &st, AT_SYMLINK_NOFOLLOW);
+
+       if (error)
+               return errno;
+
+       /* Populate our dirfs node struct with stat data */
+       dnp->dn_uid = st.st_uid;
+       dnp->dn_gid = st.st_gid;
+       dnp->dn_mode = st.st_mode;
+       dnp->dn_flags = st.st_flags;
+       dnp->dn_links = st.st_nlink;
+       dnp->dn_atime = st.st_atime;
+       dnp->dn_atimensec = (st.st_atime * 1000000000L);
+       dnp->dn_mtime = st.st_mtime;
+       dnp->dn_mtimensec = (st.st_mtime * 1000000000L);
+       dnp->dn_ctime = st.st_ctime;
+       dnp->dn_ctimensec = (st.st_ctime * 1000000000L);
+       dnp->dn_gen = st.st_gen;
+       dnp->dn_ino = st.st_ino;
+       dnp->dn_st_dev = st.st_dev;
+       dnp->dn_size = st.st_size;
+       dnp->dn_type = dirfs_nodetype(&st);
+
+       return 0;
+}
+
+char *
+dirfs_node_absolute_path(dirfs_mount_t dmp, dirfs_node_t cur, char **pathfreep)
+{
+       return(dirfs_node_absolute_path_plus(dmp, cur, NULL, pathfreep));
+}
+
+char *
+dirfs_node_absolute_path_plus(dirfs_mount_t dmp, dirfs_node_t cur,
+                             char *last, char **pathfreep)
+{
+       size_t len;
+       dirfs_node_t dnp1;
+       char *buf;
+       int count;
+
+       debug_called();
+
+       KKASSERT(dmp->dm_root); /* Sanity check */
+       *pathfreep = NULL;
+       if (cur == NULL)
+               return NULL;
+       buf = kmalloc(MAXPATHLEN + 1, M_DIRFS_MISC, M_WAITOK);
+
+       /*
+        * Passed-in trailing element.
+        */
+       count = 0;
+       buf[MAXPATHLEN] = 0;
+       if (last) {
+               len = strlen(last);
+               count += len;
+               if (count <= MAXPATHLEN)
+                       bcopy(last, &buf[MAXPATHLEN - count], len);
+               ++count;
+               if (count <= MAXPATHLEN)
+                       buf[MAXPATHLEN - count] = '/';
+       }
+
+       /*
+        * Iterate through the parents until we hit the root.
+        */
+       dnp1 = cur;
+       while (dirfs_node_isroot(dnp1) == 0) {
+               count += dnp1->dn_namelen;
+               if (count <= MAXPATHLEN) {
+                       bcopy(dnp1->dn_name, &buf[MAXPATHLEN - count],
+                             dnp1->dn_namelen);
+               }
+               ++count;
+               if (count <= MAXPATHLEN)
+                       buf[MAXPATHLEN - count] = '/';
+               dnp1 = dnp1->dn_parent;
+               if (dnp1 == NULL)
+                       break;
+       }
+
+       /*
+        * Prefix with the root mount path.  If the element was unlinked
+        * dnp1 will be NULL and there is no path.
+        */
+       len = strlen(dmp->dm_path);
+       count += len;
+       if (dnp1 && count <= MAXPATHLEN) {
+               bcopy(dmp->dm_path, &buf[MAXPATHLEN - count], len);
+               *pathfreep = buf;
+               dbg(5, "absolute_path %s\n", &buf[MAXPATHLEN - count]);
+               return (&buf[MAXPATHLEN - count]);
+       } else {
+               kfree(buf, M_DIRFS_MISC);
+               *pathfreep = NULL;
+               return (NULL);
+       }
+}
+
+/*
+ * Return a dirfs_node with a valid descriptor plus an allocated
+ * relative path which can be used in openat(), fstatat(), etc calls
+ * to locate the requested inode.
+ */
+dirfs_node_t
+dirfs_findfd(dirfs_mount_t dmp, dirfs_node_t cur,
+            char **pathto, char **pathfreep)
+{
+       dirfs_node_t dnp1;
+       int count;
+       char *buf;
+
+       debug_called();
+
+       *pathfreep = NULL;
+       *pathto = NULL;
+
+       if (cur == NULL)
+               return NULL;
+
+       buf = kmalloc(MAXPATHLEN + 1, M_DIRFS_MISC, M_WAITOK | M_ZERO);
+       count = 0;
+
+       dnp1 = cur;
+       while (dnp1 == cur || dnp1->dn_fd == DIRFS_NOFD) {
+               count += dnp1->dn_namelen;
+               if (count <= MAXPATHLEN) {
+                       bcopy(dnp1->dn_name, &buf[MAXPATHLEN - count],
+                             dnp1->dn_namelen);
+               }
+               ++count;
+               if (count <= MAXPATHLEN)
+                       buf[MAXPATHLEN - count] = '/';
+               dnp1 = dnp1->dn_parent;
+               KKASSERT(dnp1 != NULL);
+       }
+
+       if (dnp1 && count <= MAXPATHLEN) {
+               *pathfreep = buf;
+               *pathto = &buf[MAXPATHLEN - count + 1]; /* skip '/' prefix */
+               dirfs_node_ref(dnp1);
+               dbg(5, "fd=%d dnp1=%p dnp1->dn_name=%d &buf[off]=%s\n",
+                   dnp1->dn_fd, dnp1, dnp1->dn_name, *pathto);
+       } else {
+               dbg(5, "failed too long\n");
+               kfree(buf, M_DIRFS_MISC);
+               *pathfreep = NULL;
+               *pathto = NULL;
+               dnp1 = NULL;
+       }
+       return (dnp1);
+}
+
+void
+dirfs_dropfd(dirfs_mount_t dmp, dirfs_node_t dnp1, char *pathfree)
+{
+       if (pathfree)
+               kfree(pathfree, M_DIRFS_MISC);
+       if (dnp1)
+               dirfs_node_drop(dmp, dnp1);
+}
+
+int
+dirfs_node_getperms(dirfs_node_t dnp, int *r, int *w, int *x)
+{
+       uid_t u;
+       gid_t g;
+       int isowner, isgroup;
+
+       u = getuid();   /* XXX What about EUID? */
+       g = getgid();   /* XXX What about EGID? */
+       isowner = (u == dnp->dn_uid);
+       isgroup = (g == dnp->dn_gid);
+
+       if (r) {
+               if (isowner && (dnp->dn_mode & S_IRUSR))
+                       *r = 1;
+               else if (isgroup && (dnp->dn_mode & S_IRGRP))
+                       *r = 1;
+               else if (dnp->dn_mode & S_IROTH)
+                       *r = 1;
+       }
+
+       if (w) {
+               if (isowner && (dnp->dn_mode & S_IWUSR))
+                       *w = 1;
+               else if (isgroup && (dnp->dn_mode & S_IWGRP))
+                       *w = 1;
+               else if (dnp->dn_mode & S_IWOTH)
+                       *w = 1;
+       }
+
+       if (x) {
+               if (isowner && (dnp->dn_mode & S_IXUSR))
+                       *x = 1;
+               else if (isgroup && (dnp->dn_mode & S_IXGRP))
+                       *x = 1;
+               else if (dnp->dn_mode & S_IXOTH)
+                       *x = 1;
+       }
+
+       return 0;
+}
+
+/*
+ * This requires an allocated node and vnode, otherwise it'll panic
+ */
+int
+dirfs_open_helper(dirfs_mount_t dmp, dirfs_node_t dnp, int parentfd,
+                 char *relpath)
+{
+       int canread, canwrite, canexec;
+       dirfs_node_t pathnp;
+       char *tmp;
+       char *pathfree;
+       int flags, error;
+
+       debug_called();
+
+       canread = canwrite = canexec = 0;
+       flags = error = 0;
+       tmp = NULL;
+
+       KKASSERT(dnp);
+       KKASSERT(dnp->dn_vnode);
+
+       /*
+        * XXX Besides VDIR and VREG there are other file
+        * types, y'know?
+        * Also, O_RDWR alone might not be the best mode to open
+        * a file with, need to investigate which suits better.
+        */
+       dirfs_node_getperms(dnp, &canread, &canwrite, &canexec);
+
+       if (dnp->dn_type & VDIR) {
+               flags |= O_DIRECTORY;
+       } else {
+               if (canwrite)
+                       flags |= O_RDWR;
+               else
+                       flags |= O_RDONLY;
+       }
+       if (relpath != NULL) {
+               tmp = relpath;
+               pathnp = NULL;
+               KKASSERT(parentfd != DIRFS_NOFD);
+       } else if (parentfd == DIRFS_NOFD) {
+               pathnp = dirfs_findfd(dmp, dnp, &tmp, &pathfree);
+               parentfd = pathnp->dn_fd;
+       } else {
+               pathnp = NULL;
+       }
+
+       dnp->dn_fd = openat(parentfd, tmp, flags);
+       if (dnp->dn_fd == -1)
+               error = errno;
+
+       dbg(5, "dnp=%p tmp2=%s parentfd=%d flags=%d error=%d "
+           "r=%d w=%d x=%d\n", dnp, tmp, parentfd, flags, error,
+           canread, canwrite, canexec);
+
+       if (pathnp)
+               dirfs_dropfd(dmp, pathnp, pathfree);
+
+       return error;
+}
+
+int
+dirfs_close_helper(dirfs_node_t dnp)
+{
+       int error = 0;
+
+       debug_called();
+
+
+       if (dnp->dn_fd != DIRFS_NOFD) {
+               dbg(5, "closed fd on dnp=%p\n", dnp);
+#if 0
+               /* buffer cache buffers may still be present */
+               error = close(dnp->dn_fd); /* XXX EINTR should be checked */
+               dnp->dn_fd = DIRFS_NOFD;
+#endif
+       }
+
+       return error;
+}
+
+int
+dirfs_node_refcnt(dirfs_node_t dnp)
+{
+       return dnp->dn_refcnt;
+}
+
+int
+dirfs_node_chtimes(dirfs_node_t dnp)
+{
+       struct vnode *vp;
+       dirfs_mount_t dmp;
+       int error = 0;
+       char *tmp;
+       char *pathfree;
+
+       debug_called();
+
+       vp = NODE_TO_VP(dnp);
+       dmp = VFS_TO_DIRFS(vp->v_mount);
+
+       KKASSERT(vn_islocked(vp));
+
+       if (dnp->dn_flags & (IMMUTABLE | APPEND))
+               return EPERM;
+
+       tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree);
+       KKASSERT(tmp);
+       if((lutimes(tmp, NULL)) == -1)
+               error = errno;
+
+       dirfs_node_stat(DIRFS_NOFD, tmp, dnp);
+       dirfs_dropfd(dmp, NULL, pathfree);
+
+       KKASSERT(vn_islocked(vp));
+
+
+       return error;
+}
+
+int
+dirfs_node_chflags(dirfs_node_t dnp, int vaflags, struct ucred *cred)
+{
+       struct vnode *vp;
+       dirfs_mount_t dmp;
+       int error = 0;
+       int flags;
+       char *tmp;
+       char *pathfree;
+
+       debug_called();
+
+       vp = NODE_TO_VP(dnp);
+       dmp = VFS_TO_DIRFS(vp->v_mount);
+
+       KKASSERT(vn_islocked(vp));
+
+       flags = dnp->dn_flags;
+
+       error = vop_helper_setattr_flags(&flags, vaflags, dnp->dn_uid, cred);
+       /*
+        * When running vkernels with non-root it is not possible to set
+        * certain flags on host files, such as SF* flags. chflags(2) call
+        * will spit an error in that case.
+        */
+       if (error == 0) {
+               tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree);
+               KKASSERT(tmp);
+               if((lchflags(tmp, flags)) == -1)
+                       error = errno;
+               dirfs_node_stat(DIRFS_NOFD, tmp, dnp);
+               dirfs_dropfd(dmp, NULL, pathfree);
+       }
+
+       KKASSERT(vn_islocked(vp));
+
+       return error;
+}
+
+int
+dirfs_node_chmod(dirfs_mount_t dmp, dirfs_node_t dnp, mode_t mode)
+{
+       char *tmp;
+       char *pathfree;
+       int error = 0;
+
+       tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree);
+       KKASSERT(tmp);
+       if (lchmod(tmp, mode) < 0)
+               error = errno;
+       dirfs_node_stat(DIRFS_NOFD, tmp, dnp);
+       dirfs_dropfd(dmp, NULL, pathfree);
+
+       return error;
+}
+
+int
+dirfs_node_chown(dirfs_mount_t dmp, dirfs_node_t dnp,
+                uid_t uid, uid_t gid, mode_t mode)
+{
+       char *tmp;
+       char *pathfree;
+       int error = 0;
+
+       tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree);
+       KKASSERT(tmp);
+       if (lchown(tmp, uid, gid) < 0)
+               error = errno;
+       if (mode != dnp->dn_mode)
+               lchmod(tmp, mode);
+       dirfs_node_stat(DIRFS_NOFD, tmp, dnp);
+       dirfs_dropfd(dmp, NULL, pathfree);
+
+       return error;
+}
+
+
+int
+dirfs_node_chsize(dirfs_node_t dnp, off_t nsize)
+{
+       dirfs_mount_t dmp;
+       struct vnode *vp;
+       int error = 0;
+       char *tmp;
+       char *pathfree;
+       off_t osize;
+       int biosize;
+
+       debug_called();
+
+       KKASSERT(dnp);
+
+       vp = NODE_TO_VP(dnp);
+       dmp = VFS_TO_DIRFS(vp->v_mount);
+       biosize = BSIZE;
+       osize = dnp->dn_size;
+
+       KKASSERT(vn_islocked(vp));
+
+       switch (vp->v_type) {
+       case VDIR:
+               return (EISDIR);
+       case VREG:
+               break;
+       default:
+               return (EOPNOTSUPP);
+
+       }
+
+       tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree);
+       if (nsize < osize) {
+               error = nvtruncbuf(vp, nsize, biosize, -1, 0);
+       } else {
+               error = nvextendbuf(vp, osize, nsize,
+                                   biosize, biosize,
+                                   -1, -1, 0);
+       }
+       if (error == 0 && truncate(tmp, nsize) < 0)
+               error = errno;
+       if (error == 0)
+               dnp->dn_size = nsize;
+       dbg(5, "TRUNCATE %016jx %016jx\n", (intmax_t)nsize, dnp->dn_size);
+       /*dirfs_node_stat(DIRFS_NOFD, tmp, dnp); don't need to do this*/
+
+       dirfs_dropfd(dmp, NULL, pathfree);
+
+
+       KKASSERT(vn_islocked(vp));
+
+       return error;
+}
+
+void
+dirfs_node_setpassive(dirfs_mount_t dmp, dirfs_node_t dnp, int state)
+{
+       struct vnode *vp;
+
+       if (state && (dnp->dn_state & DIRFS_PASVFD) == 0 &&
+           dnp->dn_fd != DIRFS_NOFD) {
+               dirfs_node_ref(dnp);
+               dirfs_node_setflags(dnp, DIRFS_PASVFD);
+               TAILQ_INSERT_TAIL(&dmp->dm_fdlist, dnp, dn_fdentry);
+               ++dirfs_fd_used;
+               ++dmp->dm_fd_used;
+
+               /*
+                * If we are over our limit remove nodes from the
+                * passive fd cache.
+                */
+               while (dmp->dm_fd_used > dirfs_fd_limit) {
+                       dnp = TAILQ_FIRST(&dmp->dm_fdlist);
+                       dirfs_node_setpassive(dmp, dnp, 0);
+               }
+       }
+       if (state == 0 && (dnp->dn_state & DIRFS_PASVFD)) {
+               dirfs_node_clrflags(dnp, DIRFS_PASVFD);
+               TAILQ_REMOVE(&dmp->dm_fdlist, dnp, dn_fdentry);
+               --dirfs_fd_used;
+               --dmp->dm_fd_used;
+               dbg(5, "dnp=%p removed from fdlist. %d used\n",
+                   dnp, dirfs_fd_used);
+
+               /*
+                * Attempt to close the descriptor.  We can only do this
+                * if the related vnode is inactive and has exactly two
+                * refs (representing the vp<->dnp and PASVFD).  Otherwise
+                * someone might have ref'd the node in order to use the
+                * dn_fd.
+                *
+                * Also, if the vnode is in any way dirty we leave the fd
+                * open for the buffer cache code.  The syncer will eventually
+                * come along and fsync the vnode, and the next inactive
+                * transition will deal with the descriptor.
+                *
+                * The descriptor for the root node is NEVER closed by
+                * this function.
+                */
+               vp = dnp->dn_vnode;
+               if (dirfs_node_refcnt(dnp) == 2 && vp &&
+                   dnp->dn_fd != DIRFS_NOFD &&
+                   !dirfs_node_isroot(dnp) &&
+                   (vp->v_flag & (VINACTIVE|VOBJDIRTY)) == VINACTIVE &&
+                   RB_EMPTY(&vp->v_rbdirty_tree)) {
+                       dbg(5, "passive cache: closing %d\n", dnp->dn_fd);
+                       close(dnp->dn_fd);
+                       dnp->dn_fd = DIRFS_NOFD;
+               } else {
+                       if (dirfs_node_refcnt(dnp) == 1 && dnp->dn_vnode == NULL &&
+                           dnp->dn_fd != DIRFS_NOFD &&
+                           dnp != dmp->dm_root) {
+                               dbg(5, "passive cache: closing %d\n", dnp->dn_fd);
+                               close(dnp->dn_fd);
+                               dnp->dn_fd = DIRFS_NOFD;
+                       }
+               }
+               dirfs_node_drop(dmp, dnp);
+       }
+}
+
+char *
+dirfs_flag2str(dirfs_node_t dnp)
+{
+       const char *txtflg[] = { DIRFS_TXTFLG };
+       static char str[512] = {0};
+
+       if (dnp->dn_state & DIRFS_PASVFD)
+               ksprintf(str, "%s ", txtflg[0]);
+
+       return str;
+}
+
+void
+debug(int level, const char *fmt, ...)
+{
+       __va_list ap;
+
+       if (debuglvl >= level) {
+               __va_start(ap, fmt);
+               kvprintf(fmt, ap);
+               __va_end(ap);
+       }
+}
+
diff --git a/sys/vfs/dirfs/dirfs_vfsops.c b/sys/vfs/dirfs/dirfs_vfsops.c
new file mode 100644 (file)
index 0000000..a09c1de
--- /dev/null
@@ -0,0 +1,322 @@
+/*
+ * Copyright (c) 2013 The DragonFly Project.  All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Antonio Huete Jimenez <tuxillo@quantumachine.net>
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/vfsops.h>
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/param.h>
+#include <sys/module.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/sysctl.h>
+#include <sys/queue.h>
+#include <sys/spinlock2.h>
+#include <sys/sysref2.h>
+#include <sys/ktr.h>
+
+#include <string.h>
+
+#include "dirfs.h"
+
+MALLOC_DEFINE(M_DIRFS, "dirfs", "dirfs mount allocation");
+MALLOC_DEFINE(M_DIRFS_NODE, "dirfs nodes", "dirfs nodes memory allocation");
+MALLOC_DEFINE(M_DIRFS_MISC, "dirfs misc", "dirfs miscellaneous allocation");
+
+/*
+ * Kernel tracing facilities
+ */
+KTR_INFO_MASTER(dirfs);
+
+KTR_INFO(KTR_DIRFS, dirfs, root, 31,
+    "DIRFS(root dnp=%p vnode=%p hostdir=%s fd=%d error=%d)",
+    dirfs_node_t dnp, struct vnode *vp, char *hostdir, int fd, int error);
+
+/* System wide sysctl stuff */
+int debuglvl = 2;
+int dirfs_fd_limit = 100;
+int dirfs_fd_used = 0;
+long passive_fd_list_miss = 0;
+long passive_fd_list_hits = 0;
+
+SYSCTL_NODE(_vfs, OID_AUTO, dirfs, CTLFLAG_RW, 0,
+    "dirfs filesystem for vkernels");
+SYSCTL_INT(_vfs_dirfs, OID_AUTO, debug, CTLFLAG_RW,
+    &debuglvl, 0, "dirfs debug level");
+SYSCTL_INT(_vfs_dirfs, OID_AUTO, fd_limit, CTLFLAG_RW,
+    &dirfs_fd_limit, 0, "Maximum number of passive nodes to cache");
+SYSCTL_INT(_vfs_dirfs, OID_AUTO, fd_used, CTLFLAG_RD,
+    &dirfs_fd_used, 0, "Current number of passive nodes cached");
+SYSCTL_LONG(_vfs_dirfs, OID_AUTO, passive_fd_list_miss, CTLFLAG_RD,
+    &passive_fd_list_miss, 0, "Passive fd list cache misses");
+SYSCTL_LONG(_vfs_dirfs, OID_AUTO, passive_fd_list_hits, CTLFLAG_RD,
+    &passive_fd_list_hits, 0, "Passive fd list cache misses");
+
+static int dirfs_statfs(struct mount *, struct statfs *, struct ucred *);
+
+static int
+dirfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred)
+{
+       dirfs_mount_t dmp;
+       struct stat st;
+       size_t done, nlen;
+       int error;
+
+       debug_called();
+
+       if (mp->mnt_flag & MNT_UPDATE) {
+               dmp = VFS_TO_DIRFS(mp);
+               if (dmp->dm_rdonly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
+                       /* XXX We should make sure all writes are synced */
+                       dmp->dm_rdonly = 1;
+                       debug(2, "dirfs read-write -> read-only\n");
+               }
+
+               if (dmp->dm_rdonly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
+                       debug(2, "dirfs read-only -> read-write\n");
+                       dmp->dm_rdonly = 0;
+               }
+               return 0;
+       }
+
+       dmp = kmalloc(sizeof(*dmp), M_DIRFS, M_WAITOK | M_ZERO);
+       mp->mnt_data = (qaddr_t)dmp;
+       dmp->dm_mount = mp;
+
+       error = copyinstr(data, &dmp->dm_path, MAXPATHLEN, &done);
+       if (error) {
+               /* Attempt to copy from kernel address */
+               error = copystr(data, &dmp->dm_path, MAXPATHLEN, &done);
+               if (error) {
+                       kfree(dmp, M_DIRFS);
+                       return error;
+               }
+       }
+
+       /* Strip / character at the end to avoid problems */
+       nlen = strnlen(dmp->dm_path, MAXPATHLEN);
+       if (dmp->dm_path[nlen-1] == '/')
+               dmp->dm_path[nlen-1] = 0;
+
+       /* Make sure host directory exists and it is indeed a directory. */
+       if ((stat(dmp->dm_path, &st)) == 0) {
+               if (!S_ISDIR(st.st_mode)) {
+                       kfree(dmp, M_DIRFS);
+                       return EINVAL;
+               }
+       } else {
+               return errno;
+       }
+
+       lockinit(&dmp->dm_lock, "dfsmnt", 0, LK_CANRECURSE);
+
+       vfs_add_vnodeops(mp, &dirfs_vnode_vops, &mp->mnt_vn_norm_ops);
+       vfs_getnewfsid(mp);
+
+       TAILQ_INIT(&dmp->dm_fdlist);
+       RB_INIT(&dmp->dm_inotree);
+
+       kmalloc_raise_limit(M_DIRFS_NODE, 0);
+
+       dirfs_statfs(mp, &mp->mnt_stat, cred);
+
+       dbg(5, "%s mounted. dmp=%p mp=%p\n", dmp->dm_path, dmp, mp);
+
+       return 0;
+}
+
+static int
+dirfs_unmount(struct mount *mp, int mntflags)
+{
+       dirfs_mount_t dmp;
+       dirfs_node_t dnp;
+       int cnt;
+       int error;
+
+       debug_called();
+       cnt = 0;
+       dmp = VFS_TO_DIRFS(mp);
+
+       error = vflush(mp, 0, 0);
+       if (error)
+               return error;
+
+       /*
+        * Clean up dm_fdlist.  There should be no vnodes left so the
+        * only ref should be from the fdlist.
+        */
+       while ((dnp = TAILQ_FIRST(&dmp->dm_fdlist)) != NULL) {
+               dirfs_node_setpassive(dmp, dnp, 0);
+       }
+
+       /*
+        * Cleanup root node
+        */
+       dnp = dmp->dm_root;
+       dirfs_close_helper(dnp);
+       debug_node2(dnp);
+       dirfs_node_drop(dmp, dnp);      /* last ref should free structure */
+
+       kfree(dmp, M_DIRFS);
+       mp->mnt_data = (qaddr_t) 0;
+
+       dbg(5, "dirfs umounted successfully\n");
+
+       return 0;
+}
+
+static int
+dirfs_root(struct mount *mp, struct vnode **vpp)
+{
+       dirfs_mount_t dmp;
+       dirfs_node_t dnp;
+       int fd;
+       int error;
+
+       debug_called();
+
+       dmp = VFS_TO_DIRFS(mp);
+       KKASSERT(dmp != NULL);
+
+       if (dmp->dm_root == NULL) {
+               /*
+                * dm_root holds the root dirfs node. Allocate a new one since
+                * there is none. Also attempt to lstat(2) it, in order to set
+                * data for VOP_ACCESS()
+                */
+               dnp = dirfs_node_alloc(mp);
+               error = dirfs_node_stat(DIRFS_NOFD, dmp->dm_path, dnp);
+               if (error != 0) {
+                       dirfs_node_free(dmp, dnp);
+                       return error;
+               }
+               dirfs_node_ref(dnp);    /* leave inact for life of mount */
+
+               /* Root inode's parent is NULL, used for verification */
+               dnp->dn_parent = NULL;
+               dmp->dm_root = dnp;
+               dirfs_node_setflags(dnp, DIRFS_ROOT);
+
+               /*
+                * Maintain an open descriptor on the root dnp.  The
+                * normal open/close/cache does not apply for the root
+                * so the descriptor is ALWAYS available.
+                */
+               fd = open(dmp->dm_path, O_DIRECTORY);
+               if (fd == -1) {
+                       dbg(5, "failed to open ROOT node\n");
+                       dirfs_free_vp(dmp, dnp);
+                       dirfs_node_free(dmp, dnp);
+                       return errno;
+               }
+               dnp->dn_fd = fd;
+               dnp->dn_type = VDIR;
+       } else {
+               dnp = dmp->dm_root;
+       }
+
+       /*
+        * Acquire the root vnode (dn_type already set above).  This
+        * call will handle any races and return a locked vnode.
+        */
+       dirfs_alloc_vp(mp, vpp, LK_CANRECURSE, dnp);
+       KTR_LOG(dirfs_root, dnp, *vpp, dmp->dm_path, dnp->dn_fd, error);
+
+       return 0;
+}
+
+static int
+dirfs_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp, struct vnode **vpp)
+{
+       debug_called();
+
+       return EOPNOTSUPP;
+}
+
+static int
+dirfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
+{
+       dirfs_mount_t dmp = VFS_TO_DIRFS(mp);
+       struct statfs st;
+
+       debug_called();
+
+       if((statfs(dmp->dm_path, &st)) == -1)
+               return errno;
+
+       ksnprintf(st.f_mntfromname, MNAMELEN - 1, "dirfs@%s", dmp->dm_path);
+       bcopy(&st, sbp, sizeof(st));
+       strlcpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN);
+       dbg(5, "iosize = %zd\n", sbp->f_iosize);
+
+       return 0;
+}
+
+static int
+dirfs_vptofh(struct vnode *vp, struct fid *fhp)
+{
+       dirfs_node_t dnp;
+
+       dnp = VP_TO_NODE(vp);
+       debug_node2(dnp);
+       debug_called();
+
+       return EOPNOTSUPP;
+}
+
+static int
+dirfs_checkexp(struct mount *mp, struct sockaddr *nam, int *exflagsp,
+              struct ucred **credanonp)
+{
+       debug_called();
+
+       return EOPNOTSUPP;
+}
+
+static struct vfsops dirfs_vfsops = {
+       .vfs_mount =                    dirfs_mount,
+       .vfs_unmount =                  dirfs_unmount,
+       .vfs_root =                     dirfs_root,
+       .vfs_vget =                     vfs_stdvget,
+       .vfs_statfs =                   dirfs_statfs,
+       .vfs_fhtovp =                   dirfs_fhtovp,
+       .vfs_vptofh =                   dirfs_vptofh,
+       .vfs_sync =                     vfs_stdsync,
+       .vfs_checkexp =                 dirfs_checkexp
+};
+
+VFS_SET(dirfs_vfsops, dirfs, 0);
+MODULE_VERSION(dirfs, 1);
diff --git a/sys/vfs/dirfs/dirfs_vnops.c b/sys/vfs/dirfs/dirfs_vnops.c
new file mode 100644 (file)
index 0000000..f060f41
--- /dev/null
@@ -0,0 +1,1500 @@
+/*
+ * Copyright (c) 2013 The DragonFly Project.  All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Antonio Huete Jimenez <tuxillo@quantumachine.net>
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/*
+ * See below a small table with the vnode operation and syscall correspondence
+ * where it applies:
+ *
+ * VNODE OP            SCALL   SCALL_AT  FD    PATH    COMMENTS
+ * dirfs_ncreate       Y       Y         Y     Y       open(2), openat(2)
+ * dirfs_nresolve      -       -         -     Y       no syscall needed
+ * dirfs_nlookupdot    -       -         -     -       -
+ * dirfs_nmknod                Y       Y         Y     Y       mknod(2), mknodat(2)
+ * dirfs_open          Y       Y         Y     Y       open(2), openat(2)
+ * dirfs_close         Y       Y         Y     Y       close(2)
+ * dirfs_access                -       -         -     -       data from stat(2)
+ * dirfs_getattr       Y       Y         Y     Y       lstat(2), fstat(2), fstatat(2)
+ * dirfs_setattr       -       -         -     -       -
+ * dirfs_read          Y       -         Y     -       read(2). relies on bufcache
+ * dirfs_write         Y       -         Y     -       write(2). relies on bufcache
+ * dirfs_fsync         Y       -         Y     -       fsync(2)
+ * dirfs_mountctl      -       -         -     -       -
+ * dirfs_nremove       Y       -         -     Y       unlink(2)
+ * dirfs_nlink         -       -         -     -       -
+ * dirfs_nrename       Y       Y         Y     Y       rename(2), renameat(2)
+ * dirfs_nmkdir                Y       Y         Y     Y       mkdir(2), mkdirat(2)
+ * dirfs_nrmdir                Y       -         -     Y       rmdir(2)
+ * dirfs_nsymlink      Y       Y         Y     Y       symlink(2), symlinkat(2)
+ * dirfs_readdir       Y       -         Y     -       getdirentries(2)
+ * dirfs_readlink      Y       Y         Y     Y       readlink(2), readlinkat(2)
+ * dirfs_inactive      -       -         -     -       -
+ * dirfs_reclaim       -       -         -     -       -
+ * dirfs_print         -       -         -     -       -
+ * dirfs_pathconf      -       -         -     -       -
+ * dirfs_bmap          -       -         -     -       -
+ * dirfs_strategy      Y       -         Y     -       pwrite(2), pread(2)
+ * dirfs_advlock       -       -         -     -       -
+ * dirfs_kqfilter      -       -         -     -       -
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include <sys/vfsops.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/namecache.h>
+#include <sys/queue.h>
+#include <sys/systm.h>
+#include <sys/dirent.h>
+#include <sys/mount.h>
+#include <sys/signalvar.h>
+#include <sys/resource.h>
+#include <sys/buf2.h>
+#include <sys/kern_syscall.h>
+#include <sys/ktr.h>
+
+#include "dirfs.h"
+
+/*
+ * Kernel tracing facilities
+ */
+KTR_INFO_MASTER_EXTERN(dirfs);
+
+KTR_INFO(KTR_DIRFS, dirfs, unsupported, 0,
+    "DIRFS(func=%s)",
+    const char *func);
+
+KTR_INFO(KTR_DIRFS, dirfs, nresolve, 0,
+    "DIRFS(dnp=%p ncp_name=%s parent=%p pfd=%d error=%d)",
+    dirfs_node_t dnp, char *name, dirfs_node_t pdnp, int pfd, int error);
+
+KTR_INFO(KTR_DIRFS, dirfs, ncreate, 1,
+    "DIRFS(dnp=%p ncp_name=%s parent=%p pfd=%d error=%d)",
+    dirfs_node_t dnp, char *name, dirfs_node_t pdnp, int pfd, int error);
+
+KTR_INFO(KTR_DIRFS, dirfs, open, 2,
+    "DIRFS(dnp=%p newfd?=%s)",
+    dirfs_node_t dnp, char *isnew);
+
+KTR_INFO(KTR_DIRFS, dirfs, close, 3,
+    "DIRFS(dnp=%p fd=%d vfsync error=%d)",
+    dirfs_node_t dnp, int fd, int error);
+
+KTR_INFO(KTR_DIRFS, dirfs, readdir, 4,
+    "DIRFS(dnp=%p fd=%d startoff=%jd uio_offset=%jd)",
+    dirfs_node_t dnp, int fd, off_t startoff, off_t uoff);
+
+KTR_INFO(KTR_DIRFS, dirfs, access, 5,
+    "DIRFS(dnp=%p error=%d)",
+    dirfs_node_t dnp, int error);
+
+KTR_INFO(KTR_DIRFS, dirfs, getattr, 6,
+    "DIRFS(dnp=%p error=%d)",
+    dirfs_node_t dnp, int error);
+
+KTR_INFO(KTR_DIRFS, dirfs, setattr, 7,
+    "DIRFS(dnp=%p action=%s error=%d)",
+    dirfs_node_t dnp, const char *action, int error);
+
+KTR_INFO(KTR_DIRFS, dirfs, fsync, 8,
+    "DIRFS(dnp=%p error=%d)",
+    dirfs_node_t dnp, int error);
+
+KTR_INFO(KTR_DIRFS, dirfs, read, 9,
+    "DIRFS(dnp=%p size=%jd error=%d)",
+    dirfs_node_t dnp, size_t size, int error);
+
+KTR_INFO(KTR_DIRFS, dirfs, write, 10,
+    "DIRFS(dnp=%p size=%jd boff=%jd uio_resid=%jd error=%d)",
+    dirfs_node_t dnp, off_t boff, size_t resid, size_t size, int error);
+
+KTR_INFO(KTR_DIRFS, dirfs, strategy, 11,
+    "DIRFS(dnp=%p dnp_size=%jd iosize=%jd b_cmd=%d b_error=%d "
+    "b_resid=%d bio_off=%jd error=%d)",
+    dirfs_node_t dnp, size_t size, size_t iosize, int cmd, int berror,
+    int bresid, off_t biooff, int error);
+
+KTR_INFO(KTR_DIRFS, dirfs, nremove, 12,
+    "DIRFS(dnp=%p pdnp=%p error=%d)",
+    dirfs_node_t dnp, dirfs_node_t pdnp, int error);
+
+KTR_INFO(KTR_DIRFS, dirfs, nmkdir, 13,
+    "DIRFS(pdnp=%p dnp=%p nc_name=%p error=%d)",
+    dirfs_node_t dnp, dirfs_node_t pdnp, char *n, int error);
+
+KTR_INFO(KTR_DIRFS, dirfs, nrmdir, 13,
+    "DIRFS(pdnp=%p dnp=%p error=%d)",
+    dirfs_node_t dnp, dirfs_node_t pdnp, int error);
+
+KTR_INFO(KTR_DIRFS, dirfs, nsymlink, 14,
+    "DIRFS(dnp=%p target=%s symlink=%s error=%d)",
+    dirfs_node_t dnp, char *tgt, char *lnk, int error);
+
+/* Needed prototypes */
+int dirfs_access(struct vop_access_args *);
+int dirfs_getattr(struct vop_getattr_args *);
+int dirfs_setattr(struct vop_setattr_args *);
+int dirfs_reclaim(struct vop_reclaim_args *);
+
+static int
+dirfs_nresolve(struct vop_nresolve_args *ap)
+{
+       dirfs_node_t pdnp, dnp, d1, d2;
+       dirfs_mount_t dmp;
+       struct namecache *ncp;
+       struct nchandle *nch;
+       struct vnode *dvp;
+       struct vnode *vp;
+       struct mount *mp;
+       int error;
+
+       debug_called();
+
+       error = 0;
+       nch = ap->a_nch;
+       ncp = nch->ncp;
+       mp = nch->mount;
+       dvp = ap->a_dvp;
+       vp = NULL;
+       dnp = d1 = d2 = NULL;
+       pdnp = VP_TO_NODE(dvp);
+       dmp = VFS_TO_DIRFS(mp);
+
+       dirfs_node_lock(pdnp);
+       TAILQ_FOREACH_MUTABLE(d1, &dmp->dm_fdlist, dn_fdentry, d2) {
+               if (d1->dn_parent == pdnp &&
+                   (strcmp(d1->dn_name, ncp->nc_name) == 0)) {
+                       dnp = d1;
+                       dirfs_node_ref(dnp);
+                       passive_fd_list_hits++;
+                       break;
+               }
+       }
+       dirfs_node_unlock(pdnp);
+
+       if (dnp) {
+               dirfs_alloc_vp(mp, &vp, LK_CANRECURSE, dnp);
+               dirfs_node_drop(dmp, dnp);
+       } else {
+               passive_fd_list_miss++;
+               error = dirfs_alloc_file(dmp, &dnp, pdnp, ncp, &vp, NULL, 0);
+       }
+
+       if (vp) {
+               if (error && error == ENOENT) {
+                       cache_setvp(nch, NULL);
+               } else {
+                       vn_unlock(vp);
+                       cache_setvp(nch, vp);
+                       vrele(vp);
+               }
+       }
+
+       KTR_LOG(dirfs_nresolve, dnp, ncp->nc_name, pdnp, pdnp->dn_fd, error);
+
+       return error;
+}
+
+static int
+dirfs_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
+{
+       debug_called();
+
+       KTR_LOG(dirfs_unsupported, __func__);
+
+       return EOPNOTSUPP;
+}
+
+static int
+dirfs_ncreate(struct vop_ncreate_args *ap)
+{
+       dirfs_node_t pdnp;
+       dirfs_node_t dnp;
+       dirfs_mount_t dmp;
+       struct namecache *ncp;
+       struct vnode *dvp;
+       struct vnode **vpp;
+       struct vattr *vap;
+       int canwrite = 0;
+       int error;
+
+       debug_called();
+
+       error = 0;
+       dnp = NULL;
+       dvp = ap->a_dvp;
+       pdnp = VP_TO_NODE(dvp);
+       dmp = VFS_TO_DIRFS(dvp->v_mount);
+       vap = ap->a_vap;
+       ncp = ap->a_nch->ncp;
+       vpp = ap->a_vpp;
+
+       dirfs_mount_gettoken(dmp);
+
+       dirfs_node_getperms(pdnp, NULL, &canwrite, NULL);
+       if (!canwrite)
+               error = EPERM;
+
+       error = dirfs_alloc_file(dmp, &dnp, pdnp, ncp, vpp, vap,
+           (O_CREAT | O_RDWR));
+
+       if (error == 0) {
+               cache_setunresolved(ap->a_nch);
+               cache_setvp(ap->a_nch, *vpp);
+       }
+
+       dirfs_mount_reltoken(dmp);
+
+       KTR_LOG(dirfs_ncreate, dnp, ncp->nc_name, pdnp, pdnp->dn_fd, error);
+
+       return error;
+}
+
+static int
+dirfs_nmknod(struct vop_nmknod_args *v)
+{
+       debug_called();
+
+       return EOPNOTSUPP;
+}
+
+static int
+dirfs_open(struct vop_open_args *ap)
+{
+       dirfs_node_t dnp;
+       dirfs_mount_t dmp;
+       struct vnode *vp;
+       int error;
+       int ofd, nfd;
+
+       debug_called();
+
+       vp = ap->a_vp;
+       dnp = VP_TO_NODE(vp);
+       dmp = VFS_TO_DIRFS(vp->v_mount);
+       error = 0;
+       ofd = nfd = dnp->dn_fd;
+
+       /*
+        * Root inode has been allocated and opened in VFS_ROOT() so
+        * no reason to attempt to open it again.
+        */
+       if (dmp->dm_root != dnp && dnp->dn_fd == DIRFS_NOFD) {
+               error = dirfs_open_helper(dmp, dnp, DIRFS_NOFD, NULL);
+               if (error)
+                       return error;
+               nfd = dnp->dn_fd;
+       }
+
+       KTR_LOG(dirfs_open, dnp, (ofd != nfd) ? "true" : "false");
+
+       return vop_stdopen(ap);
+}
+
+static int
+dirfs_close(struct vop_close_args *ap)
+{
+       struct vnode *vp;
+       dirfs_node_t dnp;
+       int error;
+
+       debug_called();
+
+       error = 0;
+       vp = ap->a_vp;
+       dnp = VP_TO_NODE(vp);
+
+       if (vp->v_type == VREG) {
+               error = vfsync(vp, 0, 1, NULL, NULL);
+               if (error)
+                       dbg(5, "vfsync error=%d\n", error);
+       }
+
+       KTR_LOG(dirfs_close, dnp, dnp->dn_fd, error);
+
+       return vop_stdclose(ap);
+}
+
+int
+dirfs_access(struct vop_access_args *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       int error;
+       dirfs_node_t dnp;
+
+       debug_called();
+
+       dnp = VP_TO_NODE(vp);
+
+       switch (vp->v_type) {
+       case VDIR:
+               /* FALLTHROUGH */
+       case VLNK:
+               /* FALLTHROUGH */
+       case VREG:
+               if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
+                       error = EROFS;
+                       goto out;
+               }
+               break;
+       case VBLK:
+               /* FALLTHROUGH */
+       case VCHR:
+               /* FALLTHROUGH */
+       case VSOCK:
+               /* FALLTHROUGH */
+       case VFIFO:
+               break;
+
+       default:
+               error = EINVAL;
+               goto out;
+       }
+
+       error = vop_helper_access(ap, dnp->dn_uid,
+           dnp->dn_gid, dnp->dn_mode, 0);
+
+out:
+       KTR_LOG(dirfs_access, dnp, error);
+
+       return error;
+}
+
+int
+dirfs_getattr(struct vop_getattr_args *ap)
+{
+       dirfs_mount_t dmp;
+       dirfs_node_t dnp;
+       dirfs_node_t pathnp;
+       struct vnode *vp;
+       struct vattr *vap;
+       char *tmp;
+       char *pathfree;
+       int error;
+
+       debug_called();
+
+       vp = ap->a_vp;
+       vap = ap->a_vap;
+       dnp = VP_TO_NODE(vp);
+       dmp = VFS_TO_DIRFS(vp->v_mount);
+
+       KKASSERT(dnp);  /* This must not happen */
+
+       if (!dirfs_node_isroot(dnp)) {
+               pathnp = dirfs_findfd(dmp, dnp, &tmp, &pathfree);
+
+               KKASSERT(pathnp->dn_fd != DIRFS_NOFD);
+
+               error = dirfs_node_stat(pathnp->dn_fd, tmp, dnp);
+               dirfs_dropfd(dmp, pathnp, pathfree);
+       } else {
+               error = dirfs_node_stat(DIRFS_NOFD, dmp->dm_path, dnp);
+       }
+
+       if (error == 0) {
+               dirfs_node_lock(dnp);
+               vap->va_nlink = dnp->dn_links;
+               vap->va_type = dnp->dn_type;
+               vap->va_mode = dnp->dn_mode;
+               vap->va_uid = dnp->dn_uid;
+               vap->va_gid = dnp->dn_gid;
+               vap->va_fileid = dnp->dn_ino;
+               vap->va_size = dnp->dn_size;
+               vap->va_blocksize = dnp->dn_blocksize;
+               vap->va_atime.tv_sec = dnp->dn_atime;
+               vap->va_atime.tv_nsec = dnp->dn_atimensec;
+               vap->va_mtime.tv_sec = dnp->dn_mtime;
+               vap->va_mtime.tv_nsec = dnp->dn_mtimensec;
+               vap->va_ctime.tv_sec = dnp->dn_ctime;
+               vap->va_ctime.tv_nsec = dnp->dn_ctimensec;
+               vap->va_bytes = dnp->dn_size;
+               vap->va_gen = dnp->dn_gen;
+               vap->va_flags = dnp->dn_flags;
+               vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+               dirfs_node_unlock(dnp);
+       }
+
+       KTR_LOG(dirfs_getattr, dnp, error);
+
+       return 0;
+}
+
+int
+dirfs_setattr(struct vop_setattr_args *ap)
+{
+       dirfs_mount_t dmp;
+       dirfs_node_t dnp;
+       struct vnode *vp;
+       struct vattr *vap;
+       struct ucred *cred;
+       int error;
+#ifdef KTR
+       const char *msg[6] = {
+               "invalid",
+               "chflags",
+               "chsize",
+               "chown",
+               "chmod",
+               "chtimes"
+       };
+#endif
+       int msgno;
+
+       debug_called();
+
+       error = msgno = 0;
+       vp = ap->a_vp;
+       vap = ap->a_vap;
+       cred = ap->a_cred;
+       dnp = VP_TO_NODE(vp);
+       dmp = VFS_TO_DIRFS(vp->v_mount);
+
+       dirfs_mount_gettoken(dmp);
+
+       /*
+        * Check for unsettable attributes.
+        */
+       if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
+           (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
+           (vap->va_blocksize != VNOVAL) || (vap->va_rmajor != VNOVAL) ||
+           ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
+               msgno = 0;
+               error = EINVAL;
+               goto out;
+       }
+
+       /*
+        * Change file flags
+        */
+       if (error == 0 && (vap->va_flags != VNOVAL)) {
+               if (vp->v_mount->mnt_flag & MNT_RDONLY)
+                       error = EROFS;
+               else
+                       error = dirfs_node_chflags(dnp, vap->va_flags, cred);
+               msgno = 1;
+               goto out;
+       }
+
+       /*
+        * Extend or truncate a file
+        */
+       if (error == 0 && (vap->va_size != VNOVAL)) {
+               if (vp->v_mount->mnt_flag & MNT_RDONLY)
+                       error = EROFS;
+               else
+                       error = dirfs_node_chsize(dnp, vap->va_size);
+               dbg(2, "dnp size=%jd vap size=%jd\n", dnp->dn_size, vap->va_size);
+               msgno = 2;
+               goto out;
+       }
+
+       /*
+        * Change file owner or group
+        */
+       if (error == 0 && (vap->va_uid != (uid_t)VNOVAL ||
+               vap->va_gid != (gid_t)VNOVAL)) {
+               if (vp->v_mount->mnt_flag & MNT_RDONLY) {
+                       error = EROFS;
+               } else {
+                       mode_t cur_mode = dnp->dn_mode;
+                       uid_t cur_uid = dnp->dn_uid;
+                       gid_t cur_gid = dnp->dn_gid;
+
+                       error = vop_helper_chown(ap->a_vp, vap->va_uid,
+                                                vap->va_gid, ap->a_cred,
+                                                &cur_uid, &cur_gid, &cur_mode);
+                       if (error == 0 &&
+                           (cur_mode != dnp->dn_mode ||
+                            cur_uid != dnp->dn_uid ||
+                            cur_gid != dnp->dn_gid)) {
+                               error = dirfs_node_chown(dmp, dnp, cur_uid,
+                                                        cur_gid, cur_mode);
+                       }
+               }
+               msgno = 3;
+               goto out;
+       }
+
+       /*
+        * Change file mode
+        */
+       if (error == 0 && (vap->va_mode != (mode_t)VNOVAL)) {
+               if (vp->v_mount->mnt_flag & MNT_RDONLY) {
+                       error = EROFS;
+               } else {
+                       mode_t cur_mode = dnp->dn_mode;
+                       uid_t cur_uid = dnp->dn_uid;
+                       gid_t cur_gid = dnp->dn_gid;
+
+                       error = vop_helper_chmod(ap->a_vp, vap->va_mode,
+                                                ap->a_cred,
+                                                cur_uid, cur_gid, &cur_mode);
+                       if (error == 0 && cur_mode != dnp->dn_mode) {
+                               error = dirfs_node_chmod(dmp, dnp, cur_mode);
+                       }
+               }
+               msgno = 4;
+               goto out;
+       }
+
+       /*
+        * Change file times
+        */
+       if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
+               vap->va_atime.tv_nsec != VNOVAL) ||
+               (vap->va_mtime.tv_sec != VNOVAL &&
+               vap->va_mtime.tv_nsec != VNOVAL) )) {
+               if (vp->v_mount->mnt_flag & MNT_RDONLY)
+                       error = EROFS;
+               else
+                       error = dirfs_node_chtimes(dnp);
+               msgno = 5;
+               goto out;
+
+       }
+out:
+       dirfs_mount_reltoken(dmp);
+
+       KTR_LOG(dirfs_setattr, dnp, msg[msgno], error);
+
+       return error;
+}
+
+static int
+dirfs_fsync(struct vop_fsync_args *ap)
+{
+       dirfs_node_t dnp = VP_TO_NODE(ap->a_vp);
+       int error = 0;
+
+       debug_called();
+
+       vfsync(ap->a_vp, ap->a_waitfor, 1, NULL, NULL);
+
+       if (dnp->dn_fd != DIRFS_NOFD) {
+               if (fsync(dnp->dn_fd) == -1)
+                       error = fsync(dnp->dn_fd);
+       }
+
+       KTR_LOG(dirfs_fsync, dnp, error);
+
+       return 0;
+}
+
+static int
+dirfs_read(struct vop_read_args *ap)
+{
+       struct buf *bp;
+       struct vnode *vp = ap->a_vp;
+       struct uio *uio = ap->a_uio;
+       dirfs_node_t dnp;
+       off_t base_offset;
+       size_t offset;
+       size_t len;
+       int error;
+
+       debug_called();
+
+       error = 0;
+       if (uio->uio_resid == 0) {
+               dbg(5, "zero len uio->uio_resid\n");
+               return error;
+       }
+
+       dnp = VP_TO_NODE(vp);
+
+       if (uio->uio_offset < 0)
+               return (EINVAL);
+       if (vp->v_type != VREG)
+               return (EINVAL);
+
+       while (uio->uio_resid > 0 && uio->uio_offset < dnp->dn_size) {
+               /*
+                * Use buffer cache I/O (via dirfs_strategy)
+                */
+               offset = (size_t)uio->uio_offset & BMASK;
+               base_offset = (off_t)uio->uio_offset - offset;
+               bp = getcacheblk(vp, base_offset, BSIZE, 0);
+               if (bp == NULL) {
+                       lwkt_gettoken(&vp->v_mount->mnt_token);
+                       error = bread(vp, base_offset, BSIZE, &bp);
+                       if (error) {
+                               brelse(bp);
+                               lwkt_reltoken(&vp->v_mount->mnt_token);
+                               dbg(5, "dirfs_read bread error %d\n", error);
+                               break;
+                       }
+                       lwkt_reltoken(&vp->v_mount->mnt_token);
+               }
+
+               /*
+                * Figure out how many bytes we can actually copy this loop.
+                */
+               len = BSIZE - offset;
+               if (len > uio->uio_resid)
+                       len = uio->uio_resid;
+               if (len > dnp->dn_size - uio->uio_offset)
+                       len = (size_t)(dnp->dn_size - uio->uio_offset);
+
+               error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
+               bqrelse(bp);
+               if (error) {
+                       dbg(5, "dirfs_read uiomove error %d\n", error);
+                       break;
+               }
+       }
+
+       KTR_LOG(dirfs_read, dnp, dnp->dn_size, error);
+
+       return(error);
+}
+
+static int
+dirfs_write (struct vop_write_args *ap)
+{
+       dirfs_node_t dnp;
+       dirfs_mount_t dmp;
+       struct buf *bp;
+       struct vnode *vp = ap->a_vp;
+       struct uio *uio = ap->a_uio;
+       struct thread *td = uio->uio_td;
+       int error;
+       off_t osize;
+       off_t nsize;
+       off_t base_offset;
+       size_t offset;
+       size_t len;
+       struct rlimit limit;
+
+       debug_called();
+
+       error = 0;
+       if (uio->uio_resid == 0) {
+               dbg(5, "zero-length uio->uio_resid\n");
+               return error;
+       }
+
+       dnp = VP_TO_NODE(vp);
+       dmp = VFS_TO_DIRFS(vp->v_mount);
+
+       if (vp->v_type != VREG)
+               return (EINVAL);
+
+       if (vp->v_type == VREG && td != NULL) {
+               error = kern_getrlimit(RLIMIT_FSIZE, &limit);
+               if (error != 0) {
+                       dbg(5, "rlimit failure\n");
+                       return error;
+               }
+               if (uio->uio_offset + uio->uio_resid > limit.rlim_cur) {
+                       dbg(5, "file too big\n");
+                       ksignal(td->td_proc, SIGXFSZ);
+                       return (EFBIG);
+               }
+       }
+
+       if (ap->a_ioflag & IO_APPEND)
+               uio->uio_offset = dnp->dn_size;
+
+       /*
+        * buffer cache operations may be deferred, make sure
+        * the file is correctly sized right now.
+        */
+       osize = dnp->dn_size;
+       nsize = uio->uio_offset + uio->uio_resid;
+       if (nsize > osize && uio->uio_resid) {
+               KKASSERT(dnp->dn_fd >= 0);
+               dnp->dn_size = nsize;
+               ftruncate(dnp->dn_fd, nsize);
+               nvextendbuf(vp, osize, nsize,
+                           BSIZE, BSIZE, -1, -1, 0);
+       } /* else nsize = osize; NOT USED */
+
+       while (uio->uio_resid > 0) {
+               /*
+                * Use buffer cache I/O (via dirfs_strategy)
+                */
+               offset = (size_t)uio->uio_offset & BMASK;
+               base_offset = (off_t)uio->uio_offset - offset;
+               len = BSIZE - offset;
+
+               if (len > uio->uio_resid)
+                       len = uio->uio_resid;
+
+               error = bread(vp, base_offset, BSIZE, &bp);
+               error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
+               if (error) {
+                       brelse(bp);
+                       dbg(2, "WRITE uiomove failed\n");
+                       break;
+               }
+
+//             dbg(2, "WRITE dn_size=%jd uio_offset=%jd uio_resid=%jd base_offset=%jd\n",
+//                 dnp->dn_size, uio->uio_offset, uio->uio_resid, base_offset);
+
+               if (ap->a_ioflag & IO_SYNC)
+                       bwrite(bp);
+               else
+                       bdwrite(bp);
+       }
+
+       KTR_LOG(dirfs_write, dnp, base_offset, uio->uio_resid,
+           dnp->dn_size, error);
+
+       return error;
+}
+
+static int
+dirfs_advlock (struct vop_advlock_args *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       dirfs_node_t dnp = VP_TO_NODE(vp);
+
+       debug_called();
+
+       return (lf_advlock(ap, &dnp->dn_advlock, dnp->dn_size));
+}
+
+static int
+dirfs_strategy(struct vop_strategy_args *ap)
+{
+       dirfs_node_t dnp;
+       dirfs_mount_t dmp;
+       struct bio *bio = ap->a_bio;
+       struct buf *bp = bio->bio_buf;
+       struct vnode *vp = ap->a_vp;
+       int error;
+       size_t iosize;
+       char *tmp;
+       char *pathfree;
+
+       debug_called();
+
+       dnp = VP_TO_NODE(vp);
+       dmp = VFS_TO_DIRFS(vp->v_mount);
+
+       error = 0;
+
+       if (vp->v_type != VREG)  {
+               dbg(5, "not VREG\n");
+               bp->b_resid = bp->b_bcount;
+               bp->b_flags |= B_ERROR | B_INVAL;
+               bp->b_error = EINVAL;
+               biodone(bio);
+               return(0);
+       }
+
+       if (dnp->dn_fd == DIRFS_NOFD) {
+               print_backtrace(-1);
+               panic("Meh, no fd to write to. dnp=%p\n", dnp);
+       }
+
+       if (bio->bio_offset + bp->b_bcount > dnp->dn_size)
+               iosize = dnp->dn_size - bio->bio_offset;
+       else
+               iosize = bp->b_bcount;
+       KKASSERT((ssize_t)iosize >= 0);
+
+       switch (bp->b_cmd) {
+       case BUF_CMD_WRITE:
+               error = pwrite(dnp->dn_fd, bp->b_data, iosize, bio->bio_offset);
+               break;
+       case BUF_CMD_READ:
+               error = pread(dnp->dn_fd, bp->b_data, iosize, bio->bio_offset);
+               break;
+       default:
+               bp->b_error = error = EINVAL;
+               bp->b_flags |= B_ERROR;
+               break;
+       }
+
+       if (error >= 0 && error < bp->b_bcount)
+               bzero(bp->b_data + error, bp->b_bcount - error);
+
+       if (error < 0 && errno != EINTR) {
+               dbg(5, "error=%d dnp=%p dnp->dn_fd=%d "
+                   "bio->bio_offset=%ld bcount=%d resid=%d iosize=%zd\n",
+                   errno, dnp, dnp->dn_fd, bio->bio_offset, bp->b_bcount,
+                   bp->b_resid, iosize);
+               bp->b_error = errno;
+               bp->b_resid = bp->b_bcount;
+               bp->b_flags |= B_ERROR;
+       } else {
+               tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree);
+               dirfs_node_stat(DIRFS_NOFD, tmp, dnp);
+               dirfs_dropfd(dmp, NULL, pathfree);
+       }
+
+       KTR_LOG(dirfs_strategy, dnp, dnp->dn_size, iosize, bp->b_cmd,
+           bp->b_error, bp->b_resid, bio->bio_offset, error);
+
+       biodone(bio);
+
+       return 0;
+}
+
+static int
+dirfs_bmap(struct vop_bmap_args *ap)
+{
+       debug_called();
+
+       if (ap->a_doffsetp != NULL)
+               *ap->a_doffsetp = ap->a_loffset;
+       if (ap->a_runp != NULL)
+               *ap->a_runp = 0;
+       if (ap->a_runb != NULL)
+               *ap->a_runb = 0;
+
+       return 0;
+}
+
+static int
+dirfs_nremove(struct vop_nremove_args *ap)
+{
+       dirfs_node_t dnp, pdnp;
+       dirfs_node_t pathnp;
+       dirfs_mount_t dmp;
+       struct vnode *dvp;
+       struct nchandle *nch;
+       struct namecache *ncp;
+       struct mount *mp;
+       struct vnode *vp;
+       int error;
+       char *tmp;
+       char *pathfree;
+       debug_called();
+
+       error = 0;
+       tmp = NULL;
+       vp = NULL;
+       dvp = ap->a_dvp;
+       nch = ap->a_nch;
+       ncp = nch->ncp;
+
+       mp = dvp->v_mount;
+       dmp = VFS_TO_DIRFS(mp);
+
+       lwkt_gettoken(&mp->mnt_token);
+       cache_vget(nch, ap->a_cred, LK_SHARED, &vp);
+       vn_unlock(vp);
+
+       pdnp = VP_TO_NODE(dvp);
+       dnp = VP_TO_NODE(vp);
+
+       if (vp->v_type == VDIR) {
+               error = EISDIR;
+       } else {
+               pathnp = dirfs_findfd(dmp, dnp, &tmp, &pathfree);
+               dirfs_node_lock(pdnp);
+               error = unlinkat(pathnp->dn_fd, tmp, 0);
+               if (error == 0) {
+                       cache_unlink(nch);
+                       dirfs_node_setpassive(dmp, dnp, 0);
+                       if (dnp->dn_parent) {
+                               dirfs_node_drop(dmp, dnp->dn_parent);
+                               dnp->dn_parent = NULL;
+                       }
+               } else {
+                       error = errno;
+               }
+               dirfs_node_unlock(pdnp);
+               dirfs_dropfd(dmp, pathnp, pathfree);
+       }
+       vrele(vp);
+       lwkt_reltoken(&mp->mnt_token);
+
+       KTR_LOG(dirfs_nremove, dnp, pdnp, error);
+
+       return error;
+}
+
+static int
+dirfs_nlink(struct vop_nlink_args *ap)
+{
+       debug_called();
+
+       KTR_LOG(dirfs_unsupported, __func__);
+
+       return EOPNOTSUPP;
+}
+
+static int
+dirfs_nrename(struct vop_nrename_args *ap)
+{
+       dirfs_node_t dnp, fdnp, tdnp;
+       dirfs_mount_t dmp;
+       struct namecache *fncp, *tncp;
+       struct vnode *fdvp, *tdvp, *vp;
+       struct mount *mp;
+       char *fpath, *fpathfree;
+       char *tpath, *tpathfree;
+       int error;
+
+       debug_called();
+
+       error = 0;
+       fdvp = ap->a_fdvp;
+       tdvp = ap->a_tdvp;
+       fncp = ap->a_fnch->ncp;
+       tncp = ap->a_tnch->ncp;
+       mp = fdvp->v_mount;
+       dmp = VFS_TO_DIRFS(mp);
+       fdnp = VP_TO_NODE(fdvp);
+       tdnp = VP_TO_NODE(tdvp);
+
+       dbg(5, "fdnp=%p tdnp=%p from=%s to=%s\n", fdnp, tdnp, fncp->nc_name,
+           tncp->nc_name);
+
+       if (fdvp->v_mount != tdvp->v_mount)
+               return(EXDEV);
+       if (fdvp->v_mount != fncp->nc_vp->v_mount)
+               return(EXDEV);
+       if (fdvp->v_mount->mnt_flag & MNT_RDONLY)
+               return (EROFS);
+
+       tpath = dirfs_node_absolute_path_plus(dmp, tdnp,
+                                             tncp->nc_name, &tpathfree);
+       fpath = dirfs_node_absolute_path_plus(dmp, fdnp,
+                                             fncp->nc_name, &fpathfree);
+       error = rename(fpath, tpath);
+       if (error < 0)
+               error = errno;
+       if (error == 0) {
+               vp = fncp->nc_vp;       /* file being renamed */
+               dnp = VP_TO_NODE(vp);
+               dirfs_node_setname(dnp, tncp->nc_name, tncp->nc_nlen);
+
+               /*
+                * We have to mark the target file that was replaced by
+                * the rename as having been unlinked.
+                */
+               vp = tncp->nc_vp;
+               if (vp) {
+                       dbg(5, "RENAME2\n");
+                       dnp = VP_TO_NODE(vp);
+                       cache_unlink(ap->a_tnch);
+                       dirfs_node_setpassive(dmp, dnp, 0);
+                       if (dnp->dn_parent) {
+                               dirfs_node_drop(dmp, dnp->dn_parent);
+                               dnp->dn_parent = NULL;
+                       }
+
+                       /*
+                        * nlinks on directories can be a bit weird.  Zero
+                        * it out.
+                        */
+                       dnp->dn_links = 0;
+                       cache_inval_vp(vp, CINV_DESTROY);
+               }
+               cache_rename(ap->a_fnch, ap->a_tnch);
+       }
+       dirfs_dropfd(dmp, NULL, fpathfree);
+       dirfs_dropfd(dmp, NULL, tpathfree);
+
+       return error;
+}
+
+static int
+dirfs_nmkdir(struct vop_nmkdir_args *ap)
+{
+       dirfs_mount_t dmp;
+       dirfs_node_t dnp, pdnp, dnp1;
+       struct namecache *ncp;
+       struct vattr *vap;
+       struct vnode *dvp;
+       struct vnode **vpp;
+       char *tmp, *pathfree;
+       char *path;
+       int pfd, error;
+       int extrapath;
+
+       debug_called();
+
+       extrapath = error = 0;
+       dvp = ap->a_dvp;
+       vpp = ap->a_vpp;
+       dmp = VFS_TO_DIRFS(dvp->v_mount);
+       pdnp = VP_TO_NODE(dvp);
+       ncp = ap->a_nch->ncp;
+       vap = ap->a_vap;
+       pathfree = tmp = path = NULL;
+       dnp = NULL;
+
+       dirfs_node_lock(pdnp);
+       if (pdnp->dn_fd != DIRFS_NOFD) {
+               pfd = pdnp->dn_fd;
+               path = ncp->nc_name;
+       } else {
+               dnp1 = dirfs_findfd(dmp, pdnp, &tmp, &pathfree);
+               pfd = dnp1->dn_fd;
+               /* XXX check there is room to copy the path */
+               path = kmalloc(MAXPATHLEN, M_DIRFS_MISC, M_ZERO | M_WAITOK);
+               ksnprintf(path, MAXPATHLEN, "%s/%s", tmp, ncp->nc_name);
+               extrapath = 1;
+               dirfs_dropfd(dmp, dnp1, pathfree);
+       }
+
+       error = mkdirat(pfd, path, vap->va_mode);
+       if (error) {
+               error = errno;
+       } else { /* Directory has been made */
+               error = dirfs_alloc_file(dmp, &dnp, pdnp, ncp, vpp,
+                   vap, O_DIRECTORY);
+               if (error)
+                       error = errno;
+               cache_setunresolved(ap->a_nch);
+               cache_setvp(ap->a_nch, *vpp);
+       }
+       dirfs_node_unlock(pdnp);
+
+       if (extrapath)
+               kfree(path, M_DIRFS_MISC);
+
+       KTR_LOG(dirfs_nmkdir, pdnp, dnp, ncp->nc_name, error);
+
+       return error;
+}
+
+static int
+dirfs_nrmdir(struct vop_nrmdir_args *ap)
+{
+       dirfs_node_t dnp, pdnp;
+       dirfs_mount_t dmp;
+       struct vnode *dvp;
+       struct nchandle *nch;
+       struct namecache *ncp;
+       struct mount *mp;
+       struct vnode *vp;
+       int error;
+       char *tmp;
+       char *pathfree;
+
+       debug_called();
+
+       error = 0;
+       tmp = NULL;
+       vp = NULL;
+       dvp = ap->a_dvp;
+       nch = ap->a_nch;
+       ncp = nch->ncp;
+
+       mp = dvp->v_mount;
+       dmp = VFS_TO_DIRFS(mp);
+
+       lwkt_gettoken(&mp->mnt_token);
+       cache_vget(nch, ap->a_cred, LK_SHARED, &vp);
+       vn_unlock(vp);
+
+       pdnp = VP_TO_NODE(dvp);
+       dnp = VP_TO_NODE(vp);
+
+       if (vp->v_type != VDIR) {
+               error = ENOTDIR;
+       } else {
+               tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree);
+               dirfs_node_lock(pdnp);
+               error = rmdir(tmp);
+               if (error == 0) {
+                       cache_unlink(nch);
+                       dirfs_node_setpassive(dmp, dnp, 0);
+                       if (dnp->dn_parent) {
+                               dirfs_node_drop(dmp, dnp->dn_parent);
+                               dnp->dn_parent = NULL;
+                       }
+
+                       /*
+                        * nlinks on directories can be a bit weird.  Zero
+                        * it out.
+                        */
+                       dnp->dn_links = 0;
+                       cache_inval_vp(vp, CINV_DESTROY);
+               } else {
+                       error = errno;
+               }
+               dirfs_node_unlock(pdnp);
+               dirfs_dropfd(dmp, NULL, pathfree);
+       }
+       vrele(vp);
+       lwkt_reltoken(&mp->mnt_token);
+
+       KTR_LOG(dirfs_nrmdir, dnp, pdnp, error);
+
+       return error;
+}
+
+static int
+dirfs_nsymlink(struct vop_nsymlink_args *ap)
+{
+       dirfs_mount_t dmp;
+       dirfs_node_t dnp, pdnp;
+       struct mount *mp;
+       struct namecache *ncp;
+       struct vattr *vap;
+       struct vnode *dvp;
+       struct vnode **vpp;
+       char *tmp, *pathfree;
+       char *path;
+       int error;
+
+       debug_called();
+
+       error = 0;
+       dvp = ap->a_dvp;
+       vpp = ap->a_vpp;
+       mp = dvp->v_mount;
+       dmp = VFS_TO_DIRFS(dvp->v_mount);
+       pdnp = VP_TO_NODE(dvp);
+       ncp = ap->a_nch->ncp;
+       vap = ap->a_vap;
+       pathfree = tmp = path = NULL;
+       dnp = NULL;
+
+       lwkt_gettoken(&mp->mnt_token);
+       vap->va_type = VLNK;
+
+       /* Find out the whole path of our new symbolic link */
+       tmp = dirfs_node_absolute_path(dmp, pdnp, &pathfree);
+       /* XXX check there is room to copy the path */
+       path = kmalloc(MAXPATHLEN, M_DIRFS_MISC, M_ZERO | M_WAITOK);
+       ksnprintf(path, MAXPATHLEN, "%s/%s", tmp, ncp->nc_name);
+       dirfs_dropfd(dmp, NULL, pathfree);
+
+       error = symlink(ap->a_target, path);
+       if (error) {
+               error = errno;
+       } else { /* Symlink has been made */
+               error = dirfs_alloc_file(dmp, &dnp, pdnp, ncp, vpp,
+                   NULL, 0);
+               if (error)
+                       error = errno;
+               cache_setunresolved(ap->a_nch);
+               cache_setvp(ap->a_nch, *vpp);
+       }
+       dbg(5, "path=%s a_target=%s\n", path, ap->a_target);
+
+       KTR_LOG(dirfs_nsymlink, dnp, ap->a_target, path, error);
+       kfree(path, M_DIRFS_MISC);
+       lwkt_reltoken(&mp->mnt_token);
+
+       return error;
+
+}
+
+static int
+dirfs_readdir(struct vop_readdir_args *ap)
+{
+
+       struct dirent *dp, *dpn;
+       off_t __unused **cookies = ap->a_cookies;
+       int *ncookies = ap->a_ncookies;
+       int bytes;
+       char *buf;
+       long base;
+       struct vnode *vp = ap->a_vp;
+       struct uio *uio;
+       dirfs_node_t dnp;
+       off_t startoff;
+       off_t cnt;
+       int error, r;
+       size_t bufsiz;
+       off_t curoff;
+
+       debug_called();
+
+       if (ncookies)
+               debug(1, "ncookies=%d\n", *ncookies);
+
+       dnp = VP_TO_NODE(vp);
+       uio = ap->a_uio;
+       startoff = uio->uio_offset;
+       cnt = 0;
+       error = 0;
+       base = 0;
+       bytes = 0;
+
+       if (vp->v_type != VDIR)
+               return ENOTDIR;
+       if (uio->uio_resid < 0)
+               return EINVAL;
+       if ((bufsiz = uio->uio_resid) > 4096)
+               bufsiz = 4096;
+       buf = kmalloc(bufsiz, M_DIRFS_MISC, M_WAITOK | M_ZERO);
+
+       /*
+        * Generally speaking we have to be able to process ALL the
+        * entries returned by getdirentries() in order for the seek
+        * position to be correct.  For now try to size the buffer
+        * to make this happen.  A smaller buffer always works.  For
+        * now just use an appropriate size.
+        */
+       dirfs_node_lock(dnp);
+       lseek(dnp->dn_fd, startoff, SEEK_SET);
+       bytes = getdirentries(dnp->dn_fd, buf, bufsiz, &base);
+       dbg(5, "seek %016jx %016jx %016jx\n",
+               (intmax_t)startoff, (intmax_t)base,
+               (intmax_t)lseek(dnp->dn_fd, 0, SEEK_CUR));
+       if (bytes < 0) {
+               if (errno == EINVAL)
+                       panic("EINVAL on readdir\n");
+               error = errno;
+               curoff = startoff;
+               goto out;
+       } else if (bytes == 0) {
+               *ap->a_eofflag = 1;
+               curoff = startoff;
+               goto out;
+       }
+
+       for (dp = (struct dirent *)buf; bytes > 0 && uio->uio_resid > 0;
+           bytes -= _DIRENT_DIRSIZ(dp), dp = dpn) {
+               r = vop_write_dirent(&error, uio, dp->d_ino, dp->d_type,
+                   dp->d_namlen, dp->d_name);
+               if (error || r)
+                       break;
+               dpn = _DIRENT_NEXT(dp);
+               dp = dpn;
+               cnt++;
+       }
+       curoff = lseek(dnp->dn_fd, 0, SEEK_CUR);
+
+out:
+       kfree(buf, M_DIRFS_MISC);
+       uio->uio_offset = curoff;
+       dirfs_node_unlock(dnp);
+
+       KTR_LOG(dirfs_readdir, dnp, dnp->dn_fd, startoff, uio->uio_offset);
+
+       return error;
+}
+
+static int
+dirfs_readlink(struct vop_readlink_args *ap)
+{
+       dirfs_node_t dnp, pathnp;
+       dirfs_mount_t dmp;
+       struct vnode *vp;
+       struct mount *mp;
+       struct uio *uio;
+       char *tmp, *pathfree, *buf;
+       ssize_t nlen;
+       int error;
+
+       debug_called();
+
+       vp = ap->a_vp;
+
+       KKASSERT(vp->v_type == VLNK);
+
+       error = 0;
+       tmp = pathfree = NULL;
+       uio = ap->a_uio;
+       mp = vp->v_mount;
+       dmp = VFS_TO_DIRFS(mp);
+       dnp = VP_TO_NODE(vp);
+
+       lwkt_gettoken(&mp->mnt_token);
+
+       pathnp = dirfs_findfd(dmp, dnp, &tmp, &pathfree);
+
+       buf = kmalloc(uio->uio_resid, M_DIRFS_MISC, M_WAITOK | M_ZERO);
+       nlen = readlinkat(pathnp->dn_fd, dnp->dn_name, buf, uio->uio_resid);
+       if (nlen == -1 ) {
+               error = errno;
+       } else {
+               error = uiomove(buf, nlen + 1, uio);
+               buf[nlen] = '\0';
+               if (error)
+                       error = errno;
+       }
+       dirfs_dropfd(dmp, pathnp, pathfree);
+       kfree(buf, M_DIRFS_MISC);
+
+       lwkt_reltoken(&mp->mnt_token);
+
+       return error;
+}
+
+/*
+ * Main tasks to be performed.
+ * 1) When inode is NULL recycle the vnode
+ * 2) When the inode has 0 links:
+ *     - Check if in the TAILQ, if so remove.
+ *     - Destroy the inode.
+ *     - Recycle the vnode.
+ * 3) If none of the above, add the node to the TAILQ
+ *    when it has a valid fd and there is room on the
+ *    queue.
+ *
+ */
+static int
+dirfs_inactive(struct vop_inactive_args *ap)
+{
+       struct vnode *vp;
+       dirfs_mount_t dmp;
+       dirfs_node_t dnp;
+
+       debug_called();
+
+       vp = ap->a_vp;
+       dmp = VFS_TO_DIRFS(vp->v_mount);
+       dnp = VP_TO_NODE(vp);
+
+       /* Degenerate case */
+       if (dnp == NULL) {
+               dbg(5, "dnp was NULL\n");
+               vrecycle(vp);
+               return 0;
+       }
+
+       dirfs_mount_gettoken(dmp);
+
+       /*
+        * Deal with the case the inode has 0 links which means it was unlinked.
+        */
+       if (dnp->dn_links == 0) {
+               vrecycle(vp);
+               dbg(5, "recycled a vnode of an unlinked dnp\n");
+
+               goto out;
+       }
+
+       /*
+        * Try to retain the fd in our fd cache.
+        */
+       dirfs_node_setpassive(dmp, dnp, 1);
+out:
+       dirfs_mount_reltoken(dmp);
+
+       return 0;
+
+}
+
+int
+dirfs_reclaim(struct vop_reclaim_args *ap)
+{
+       struct vnode *vp;
+       dirfs_node_t dnp;
+       dirfs_mount_t dmp;
+
+       debug_called();
+
+       vp = ap->a_vp;
+       dnp = VP_TO_NODE(vp);
+       dmp = VFS_TO_DIRFS(vp->v_mount);
+
+       dirfs_free_vp(dmp, dnp);
+       /* dnp is now invalid, may have been destroyed */
+
+       return 0;
+}
+
+static int
+dirfs_mountctl(struct vop_mountctl_args *ap)
+{
+       debug_called();
+
+       KTR_LOG(dirfs_unsupported, __func__);
+
+       return EOPNOTSUPP;
+}
+
+static int
+dirfs_print(struct vop_print_args *v)
+{
+       debug_called();
+
+       KTR_LOG(dirfs_unsupported, __func__);
+
+       return EOPNOTSUPP;
+}
+
+static int __unused
+dirfs_pathconf(struct vop_pathconf_args *v)
+{
+       debug_called();
+
+       return EOPNOTSUPP;
+}
+
+static int
+dirfs_kqfilter (struct vop_kqfilter_args *ap)
+{
+       debug_called();
+
+       KTR_LOG(dirfs_unsupported, __func__);
+
+       return EOPNOTSUPP;
+}
+
+struct vop_ops dirfs_vnode_vops = {
+       .vop_default =                  vop_defaultop,
+       .vop_nwhiteout =                vop_compat_nwhiteout,
+       .vop_ncreate =                  dirfs_ncreate,
+       .vop_nresolve =                 dirfs_nresolve,
+       .vop_markatime =                vop_stdmarkatime,
+       .vop_nlookupdotdot =            dirfs_nlookupdotdot,
+       .vop_nmknod =                   dirfs_nmknod,
+       .vop_open =                     dirfs_open,
+       .vop_close =                    dirfs_close,
+       .vop_access =                   dirfs_access,
+       .vop_getattr =                  dirfs_getattr,
+       .vop_setattr =                  dirfs_setattr,
+       .vop_read =                     dirfs_read,
+       .vop_write =                    dirfs_write,
+       .vop_fsync =                    dirfs_fsync,
+       .vop_mountctl =                 dirfs_mountctl,
+       .vop_nremove =                  dirfs_nremove,
+       .vop_nlink =                    dirfs_nlink,
+       .vop_nrename =                  dirfs_nrename,
+       .vop_nmkdir =                   dirfs_nmkdir,
+       .vop_nrmdir =                   dirfs_nrmdir,
+       .vop_nsymlink =                 dirfs_nsymlink,
+       .vop_readdir =                  dirfs_readdir,
+       .vop_readlink =                 dirfs_readlink,
+       .vop_inactive =                 dirfs_inactive,
+       .vop_reclaim =                  dirfs_reclaim,
+       .vop_print =                    dirfs_print,
+       .vop_pathconf =                 vop_stdpathconf,
+       .vop_bmap =                     dirfs_bmap,
+       .vop_strategy =                 dirfs_strategy,
+       .vop_advlock =                  dirfs_advlock,
+       .vop_kqfilter =                 dirfs_kqfilter,
+       .vop_getpages =                 vop_stdgetpages,
+       .vop_putpages =                 vop_stdputpages
+};