sys/vfs/fuse: Add initial FUSE support
authorTomohiro Kusumi <kusumi.tomohiro@gmail.com>
Sun, 31 Mar 2019 16:30:07 +0000 (01:30 +0900)
committerTomohiro Kusumi <kusumi.tomohiro@gmail.com>
Sun, 31 Mar 2019 17:04:18 +0000 (02:04 +0900)
The basic code design comes from FreeBSD, but the code is written
from scratch. It was just easier to write from scratch than trying to
port sys/fs/fuse/* in FreeBSD for various reasons. Note that this is
to implement FUSE API/ABI, but not to be compatible with FreeBSD
implementation which contains FreeBSD specific sysctls, etc.

The initial version doesn't support FUSE_WRITE by disabling
VOP_WRITE() by returning EOPNOTSUPP. It currently works with simple
write(2) calls like dd(1) via direct I/O, but not when syncer thread
or mmap(2) gets involved under non trivial conditions. It looks to
be doable with custom VOP_GETPAGES() and VOP_PUTPAGES(), but if not
then it requires some changes to sys/kern/* and sys/vm/* to properly
support writes.

Besides above, this initial version supports basic FUSE operations
invoked from file related system calls via FUSE VOP's, but not things
like FUSE_IOCTL, FUSE_POLL, FUSE_FALLOCATE, etc. Although dmesg says
FUSE 7.28, don't expect it to support everything 7.28 (or anywhere
close to 7.28) says it has.

FUSE will be dropped from DragonFly releases until it gets stabilized
to certain extent including above, at least for write support.

24 files changed:
etc/mtree/BSD.include.dist
include/Makefile
sbin/Makefile
sbin/mount_fuse/Makefile [new file with mode: 0644]
sbin/mount_fuse/mount_fusefs.8 [new file with mode: 0644]
sbin/mount_fuse/mount_fusefs.c [new file with mode: 0644]
sys/conf/files
sys/conf/options
sys/kern/vfs_vnops.c
sys/sys/vfscache.h
sys/vfs/Makefile
sys/vfs/fuse/Makefile [new file with mode: 0644]
sys/vfs/fuse/fuse.h [new file with mode: 0644]
sys/vfs/fuse/fuse_abi.h [new file with mode: 0644]
sys/vfs/fuse/fuse_debug.h [new file with mode: 0644]
sys/vfs/fuse/fuse_device.c [new file with mode: 0644]
sys/vfs/fuse/fuse_file.c [new file with mode: 0644]
sys/vfs/fuse/fuse_io.c [new file with mode: 0644]
sys/vfs/fuse/fuse_ipc.c [new file with mode: 0644]
sys/vfs/fuse/fuse_mount.h [new file with mode: 0644]
sys/vfs/fuse/fuse_node.c [new file with mode: 0644]
sys/vfs/fuse/fuse_util.c [new file with mode: 0644]
sys/vfs/fuse/fuse_vfsops.c [new file with mode: 0644]
sys/vfs/fuse/fuse_vnops.c [new file with mode: 0644]

index 9bd44e2..0b8fb2d 100644 (file)
         ..
         ufs
         ..
+        fuse
+        ..
     ..
     vm
     ..
index 8d53d7c..beacb53 100644 (file)
@@ -89,7 +89,7 @@ LSUBDIRS= \
        vfs/isofs/cd9660 \
        vfs/msdosfs vfs/nfs vfs/ntfs \
        vfs/smbfs vfs/udf vfs/ufs vfs/hammer vfs/hammer2 \
-       vfs/autofs vfs/tmpfs
+       vfs/autofs vfs/fuse vfs/tmpfs
 
 # For SHARED=symlinks, bus/cam is a symlink, so cam/scsi is taken care of
 LSYMSUBDIRS=   ${LSUBDIRS:Nbus/cam/scsi:Nnet/*:Nnetgraph/*:Nnetgraph7/*}
index fd12f5a..29b15cd 100644 (file)
@@ -62,6 +62,7 @@ SUBDIR=       adjkerntz \
        mount_tmpfs \
        mount_udf \
        mount_autofs \
+       mount_fuse \
        mountd \
        natacontrol \
        natd \
diff --git a/sbin/mount_fuse/Makefile b/sbin/mount_fuse/Makefile
new file mode 100644 (file)
index 0000000..e79146a
--- /dev/null
@@ -0,0 +1,11 @@
+PROG=  mount_fusefs
+SRCS=  mount_fusefs.c
+
+CFLAGS+= -I${.CURDIR} -I${.CURDIR}/../../sys
+
+MAN=   mount_fusefs.8
+
+DPADD= ${LIBUTIL}
+LDADD= -lutil
+
+.include <bsd.prog.mk>
diff --git a/sbin/mount_fuse/mount_fusefs.8 b/sbin/mount_fuse/mount_fusefs.8
new file mode 100644 (file)
index 0000000..aae4ca5
--- /dev/null
@@ -0,0 +1,54 @@
+.\" Copyright (c) 2019 The DragonFly Project
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.Dd March 25, 2019
+.Dt MOUNT_FUSEFS 8
+.Os
+.Sh NAME
+.Nm mount_fusefs
+.Nd mount a FUSE file system
+.Sh SYNOPSIS
+.Nm
+.Op Fl o Ar options
+.Op Fl h
+.Ar special
+.Ar node
+.Sh DESCRIPTION
+The
+.Nm
+utility mounts a
+.Nm FUSE
+file system backed by
+.Ar special
+file at mount point
+.Ar node .
+.Sh SEE ALSO
+.Xr mount 8
+.Sh HISTORY
+The
+.Nm
+utility first appeared in
+.Dx 5.5 .
+.Sh AUTHORS
+.An Tomohiro Kusumi Aq Mt tkusumi@netbsd.org
diff --git a/sbin/mount_fuse/mount_fusefs.c b/sbin/mount_fuse/mount_fusefs.c
new file mode 100644 (file)
index 0000000..cce7b09
--- /dev/null
@@ -0,0 +1,193 @@
+/*-
+ * Copyright (c) 2019 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2019 The DragonFly Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <vfs/fuse/fuse_mount.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <mntopts.h>
+#include <err.h>
+
+#define MOPT_FUSE_LINUX_OPTS \
+       { "default_permissions", 0, FUSE_MOUNT_DEFAULT_PERMISSIONS, 1 }, \
+       { "allow_other", 0, FUSE_MOUNT_ALLOW_OTHER, 1 }, \
+       { "max_read=", 0, FUSE_MOUNT_MAX_READ, 1 }, \
+       { "subtype=", 0, FUSE_MOUNT_SUBTYPE, 1 }
+
+/* XXX */
+#define MOPT_FUSE_LINUX_IGNORE_OPTS \
+       { "fsname=", 0, 0, 1 }, \
+       { "fd=", 0, 0, 1 }, \
+       { "rootmode=", 0, 0, 1 }, \
+       { "user_id=", 0, 0, 1 }, \
+       { "group_id=", 0, 0, 1 }, \
+       \
+       { "auto_unmount", 0, 0, 1 }, \
+       { "blkdev", 0, 0, 1 }, \
+       { "blksize=", 0, 0, 1 }, \
+       { "context=", 0, 0, 1 }, \
+       { "fscontext=", 0, 0, 1 }, \
+       { "defcontext=", 0, 0, 1 }, \
+       { "rootcontext=", 0, 0, 1 }, \
+       { "user=", 0, 0, 1 }, \
+       { "-r", 0, 0, 1 }, \
+       { "ro", 0, 0, 1 }, \
+       { "rw", 0, 0, 1 }, \
+       { "suid", 0, 0, 1 }, \
+       { "nosuid", 0, 0, 1 }, \
+       { "dev", 0, 0, 1 }, \
+       { "nodev", 0, 0, 1 }, \
+       { "exec", 0, 0, 1 }, \
+       { "noexec", 0, 0, 1 }, \
+       { "async", 0, 0, 1 }, \
+       { "sync", 0, 0, 1 }, \
+       { "dirsync", 0, 0, 1 }, \
+       { "atime", 0, 0, 1 }, \
+       { "noatime", 0, 0, 1 }
+
+static struct mntopt mopts[] = {
+       MOPT_FUSE_LINUX_OPTS,
+       MOPT_FUSE_LINUX_IGNORE_OPTS,
+       MOPT_STDOPTS,
+       MOPT_NULL
+};
+
+static void
+usage(void)
+{
+       fprintf(stderr, "usage: mount_fusefs [-o options] fd mountpoint\n");
+       exit(1);
+}
+
+static char*
+get_optval(const char *ptr)
+{
+       char *ret = strdup(ptr);
+       const char *end = strstr(ptr, ",");
+
+       if (!end)
+               return ret;
+
+       ret[(int)(end - ptr)] = '\0';
+       return ret;
+}
+
+/*
+ * e.g.
+ * argv[0] = "mount_fusefs"
+ * argv[1] = "-o"
+ * argv[2] = "max_read=...,subtype=hello"
+ * argv[3] = "3"
+ * argv[4] = "/mnt/fuse"
+ * argv[5] = "(null)"
+ */
+int
+main(int argc, char **argv)
+{
+       struct fuse_mount_info args;
+       struct vfsconf vfc;
+       struct stat st;
+       const char *fdstr, *mntpt;
+       char *ep, mntpath[MAXPATHLEN], fusedev[64];
+       int error, c, fd, mntflags;
+
+       mntflags = 0;
+       memset(&args, 0, sizeof(args));
+
+       while ((c = getopt_long(argc, argv, "ho:", NULL, NULL)) != -1) {
+               switch(c) {
+               case 'o':
+                       getmntopts(optarg, mopts, &mntflags, &args.flags);
+                       if (args.flags & FUSE_MOUNT_MAX_READ) {
+                               char *p = strstr(optarg, "max_read=");
+                               if (p) {
+                                       p = get_optval(p + 9);
+                                       args.max_read = strtol(p, NULL, 0);
+                                       free(p);
+                               }
+                       }
+                       if (args.flags & FUSE_MOUNT_SUBTYPE) {
+                               char *p = strstr(optarg, "subtype=");
+                               if (p) {
+                                       p = get_optval(p + 8);
+                                       args.subtype = strdup(p);
+                                       free(p);
+                               }
+                       }
+                       break;
+               case 'h':
+               default:
+                       usage(); /* exit */
+               }
+       }
+       argc -= optind;
+       argv += optind;
+
+       if (argc < 2)
+               usage();
+
+       fdstr = argv[0];
+       mntpt = argv[1];
+       checkpath(mntpt, mntpath);
+
+       fd = strtol(fdstr, &ep, 10);
+       if (fd <= 0 || *ep != '\0')
+               err(1, "Invalid FUSE fd %s", fdstr);
+
+       if (fstat(fd, &st) == -1)
+               err(1, "Failed to stat FUSE fd %d", fd);
+       strcpy(fusedev, "/dev/");
+       devname_r(st.st_rdev, S_IFCHR, fusedev + strlen(fusedev),
+               sizeof(fusedev) - strlen(fusedev));
+       if (stat(fusedev, &st) == -1)
+               err(1, "Failed to stat FUSE device %s", fusedev);
+       if (strncmp(fusedev, "/dev/fuse", 9))
+               err(1, "Invalid FUSE device %s", fusedev);
+       args.fd = fd;
+       args.from = strdup(fusedev);
+
+       error = getvfsbyname("fuse", &vfc);
+       if (error && vfsisloadable("fuse")) {
+               if(vfsload("fuse"))
+                       err(1, "vfsload(%s)", "fuse");
+               endvfsent();
+               error = getvfsbyname("fuse", &vfc);
+       }
+       if (error)
+               errx(1, "%s filesystem not available", "fuse");
+
+       if (mount(vfc.vfc_name, mntpath, mntflags, &args) == -1)
+               err(1, "mount");
+
+       return 0;
+}
index 656c209..a83df19 100644 (file)
@@ -2010,6 +2010,14 @@ vfs/tmpfs/tmpfs_vnops.c          optional tmpfs
 vfs/autofs/autofs.c            optional autofs
 vfs/autofs/autofs_vfsops.c     optional autofs
 vfs/autofs/autofs_vnops.c      optional autofs
+vfs/fuse/fuse_device.c         optional fuse
+vfs/fuse/fuse_file.c           optional fuse
+vfs/fuse/fuse_io.c             optional fuse
+vfs/fuse/fuse_ipc.c            optional fuse
+vfs/fuse/fuse_node.c           optional fuse
+vfs/fuse/fuse_util.c           optional fuse
+vfs/fuse/fuse_vfsops.c         optional fuse
+vfs/fuse/fuse_vnops.c          optional fuse
 #
 vm/default_pager.c             standard
 vm/device_pager.c              standard
index 0e3e943..8ad5196 100644 (file)
@@ -101,6 +101,7 @@ SMBFS               opt_dontuse.h
 TMPFS          opt_dontuse.h
 UDF            opt_dontuse.h
 AUTOFS         opt_dontuse.h
+FUSE           opt_dontuse.h
 
 # These static filesystems has one slightly bogus static dependency in
 # sys/platform/.../x86_64/autoconf.c.  If any of these filesystems are
index 8b71e0a..74d5606 100644 (file)
@@ -169,6 +169,7 @@ again:
                        VATTR_NULL(vap);
                        vap->va_type = VREG;
                        vap->va_mode = cmode;
+                       vap->va_fuseflags = fmode; /* FUSE */
                        if (fmode & O_EXCL)
                                vap->va_vaflags |= VA_EXCLUSIVE;
                        error = VOP_NCREATE(&nd->nl_nch, nd->nl_dvp, &vp,
index 3bdab21..c0b9ee5 100644 (file)
@@ -108,7 +108,7 @@ enum vtagtype       {
        VT_UNUSED7, VT_UNUSED8, VT_NULL, VT_UNUSED10, VT_UNUSED11, VT_PROCFS,
        VT_UNUSED13, VT_ISOFS, VT_UNUSED15, VT_MSDOSFS, VT_UNUSED17, VT_VFS,
        VT_UNUSED19, VT_NTFS, VT_HPFS, VT_SMBFS, VT_UDF, VT_EXT2FS, VT_SYNTH,
-       VT_HAMMER, VT_HAMMER2, VT_DEVFS, VT_TMPFS, VT_AUTOFS
+       VT_HAMMER, VT_HAMMER2, VT_DEVFS, VT_TMPFS, VT_AUTOFS, VT_FUSE
 };
 
 /*
@@ -138,7 +138,8 @@ struct vattr {
        u_quad_t        va_filerev;     /* file modification number */
        u_int           va_vaflags;     /* operations flags, see below */
        long            va_spare;       /* remain quad aligned */
-       int64_t         va_unused01;
+       uint32_t        va_fuseflags;   /* used by FUSE */
+       uint32_t        va_unused01;
        uuid_t          va_uid_uuid;    /* native uuids if available */
        uuid_t          va_gid_uuid;
        uuid_t          va_fsid_uuid;
index 5459457..b8e971d 100644 (file)
@@ -3,7 +3,7 @@
 
 SUBDIR=        fifofs msdosfs nfs procfs \
        hpfs ntfs smbfs isofs mfs udf \
-       nullfs hammer tmpfs autofs
+       nullfs hammer tmpfs autofs fuse
 
 SUBDIR+= hammer2
 
diff --git a/sys/vfs/fuse/Makefile b/sys/vfs/fuse/Makefile
new file mode 100644 (file)
index 0000000..f8f2f7c
--- /dev/null
@@ -0,0 +1,4 @@
+KMOD=  fuse
+SRCS=  fuse_vfsops.c fuse_vnops.c fuse_device.c fuse_node.c fuse_file.c fuse_ipc.c fuse_io.c fuse_util.c
+
+.include <bsd.kmod.mk>
diff --git a/sys/vfs/fuse/fuse.h b/sys/vfs/fuse/fuse.h
new file mode 100644 (file)
index 0000000..6faa64b
--- /dev/null
@@ -0,0 +1,287 @@
+/*-
+ * Copyright (c) 2019 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2019 The DragonFly Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef FUSE_FUSE_H
+#define FUSE_FUSE_H
+
+#ifndef INVARIANTS
+#define INVARIANTS
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/malloc.h>
+#include <sys/objcache.h>
+#include <sys/proc.h>
+#include <sys/thread.h>
+#include <sys/mutex.h>
+#include <sys/mutex2.h>
+#include <sys/refcount.h>
+#include <sys/event.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/ucred.h>
+#include <sys/unistd.h>
+#include <sys/sysctl.h>
+#include <sys/errno.h>
+#include <sys/queue.h>
+#include <sys/tree.h>
+#include <machine/atomic.h>
+
+#include "fuse_debug.h"
+#include "fuse_mount.h"
+#include "fuse_abi.h"
+
+#define VFSTOFUSE(mp) ((struct fuse_mount*)((mp)->mnt_data))
+#define VTOI(vp) ((struct fuse_node*)((vp)->v_data))
+
+#define FUSE_BLKSIZE PAGE_SIZE
+#define FUSE_BLKMASK (FUSE_BLKSIZE - 1)
+#define FUSE_BLKMASK64 ((off_t)(FUSE_BLKSIZE - 1))
+
+SYSCTL_DECL(_vfs_fuse);
+
+extern int fuse_debug;
+extern struct vop_ops fuse_vnode_vops;
+extern struct vop_ops fuse_spec_vops;
+
+struct fuse_mount {
+       struct mount *mp;
+       struct vnode *devvp;
+       struct ucred *cred;
+       struct kqinfo kq;
+       struct fuse_node *rfnp;
+       struct mtx mnt_lock;
+       struct mtx ipc_lock;
+       TAILQ_HEAD(,fuse_ipc) request_head;
+       TAILQ_HEAD(,fuse_ipc) reply_head;
+
+       unsigned int refcnt;
+       unsigned long unique;
+       int dead;
+       uint64_t nosys;
+       uint32_t abi_major;
+       uint32_t abi_minor;
+       uint32_t max_write;
+};
+
+RB_HEAD(fuse_dent_tree, fuse_dent);
+
+struct fuse_node {
+       struct vnode *vp;
+       struct vattr attr;
+       struct fuse_mount *fmp;
+       struct fuse_node *pfnp;
+       struct mtx node_lock;
+       struct fuse_dent_tree dent_head;
+
+       uint64_t ino;
+       enum vtype type;
+       int nlink;
+       size_t size;
+       uint64_t nlookup;
+       uint64_t fh;
+       bool closed; /* XXX associated with closed fh */
+};
+
+struct fuse_dent {
+       struct fuse_node *fnp;
+       RB_ENTRY(fuse_dent) dent_entry;
+
+       char *name;
+};
+
+struct fuse_buf {
+       void *buf;
+       size_t len;
+};
+
+struct fuse_ipc {
+       struct fuse_mount *fmp;
+       struct fuse_buf request;
+       struct fuse_buf reply;
+       TAILQ_ENTRY(fuse_ipc) request_entry;
+       TAILQ_ENTRY(fuse_ipc) reply_entry;
+
+       unsigned int refcnt;
+       uint64_t unique;
+       int done;
+};
+
+int fuse_cmp_version(struct fuse_mount*, uint32_t, uint32_t);
+int fuse_mount_kill(struct fuse_mount*);
+int fuse_mount_free(struct fuse_mount*);
+
+int fuse_device_init(void);
+void fuse_device_cleanup(void);
+
+void fuse_node_new(struct fuse_mount*, uint64_t, enum vtype,
+    struct fuse_node**);
+void fuse_node_free(struct fuse_node*);
+void fuse_dent_new(struct fuse_node*, const char*, int, struct fuse_dent**);
+void fuse_dent_free(struct fuse_dent*);
+void fuse_dent_attach(struct fuse_node*, struct fuse_dent*);
+void fuse_dent_detach(struct fuse_node*, struct fuse_dent*);
+int fuse_dent_find(struct fuse_node*, const char*, int, struct fuse_dent**);
+int fuse_alloc_node(struct fuse_node*, uint64_t, const char*, int, enum vtype,
+    struct vnode**);
+int fuse_node_vn(struct fuse_node*, int, struct vnode**);
+int fuse_node_truncate(struct fuse_node*, size_t, size_t);
+void fuse_node_init(void);
+void fuse_node_cleanup(void);
+
+uint64_t fuse_fh(struct file*);
+void fuse_get_fh(struct file*, uint64_t);
+void fuse_put_fh(struct file*);
+uint64_t fuse_nfh(struct fuse_node*);
+void fuse_get_nfh(struct fuse_node*, uint64_t);
+void fuse_put_nfh(struct fuse_node*);
+void fuse_file_init(void);
+void fuse_file_cleanup(void);
+
+void fuse_buf_alloc(struct fuse_buf*, size_t);
+void fuse_buf_free(struct fuse_buf*);
+struct fuse_ipc *fuse_ipc_get(struct fuse_mount*, size_t);
+void fuse_ipc_put(struct fuse_ipc*);
+void *fuse_ipc_fill(struct fuse_ipc*, int, uint64_t, struct ucred*);
+int fuse_ipc_tx(struct fuse_ipc*);
+void fuse_ipc_init(void);
+void fuse_ipc_cleanup(void);
+
+int fuse_read(struct vop_read_args*);
+int fuse_write(struct vop_write_args*);
+int fuse_dio_write(struct vop_write_args*);
+
+void fuse_hexdump(const char*, size_t);
+void fuse_fill_in_header(struct fuse_in_header*, uint32_t, uint32_t, uint64_t,
+    uint64_t, uint32_t, uint32_t, uint32_t);
+int fuse_forget_node(struct fuse_mount*, uint64_t, uint64_t, struct ucred*);
+int fuse_audit_length(struct fuse_in_header*, struct fuse_out_header*);
+const char *fuse_get_ops(int);
+
+static __inline int
+fuse_test_dead(struct fuse_mount *fmp)
+{
+       return atomic_load_acq_int(&fmp->dead);
+}
+
+static __inline void
+fuse_set_dead(struct fuse_mount *fmp)
+{
+       atomic_store_rel_int(&fmp->dead, 1);
+}
+
+static __inline int
+fuse_test_nosys(struct fuse_mount *fmp, int op)
+{
+       return atomic_load_acq_64(&fmp->nosys) & (1 << op);
+}
+
+static __inline void
+fuse_set_nosys(struct fuse_mount *fmp, int op)
+{
+       atomic_set_64(&fmp->nosys, 1 << op);
+}
+
+static __inline int
+fuse_ipc_test_replied(struct fuse_ipc *fip)
+{
+       return atomic_load_acq_int(&fip->done);
+}
+
+static __inline void
+fuse_ipc_set_replied(struct fuse_ipc *fip)
+{
+       atomic_store_rel_int(&fip->done, 1);
+}
+
+static __inline int
+fuse_ipc_test_and_set_replied(struct fuse_ipc *fip)
+{
+       return atomic_cmpset_int(&fip->done, 0, 1);
+}
+
+static __inline void*
+fuse_in(struct fuse_ipc *fip)
+{
+       return fip->request.buf;
+}
+
+static __inline size_t
+fuse_in_size(struct fuse_ipc *fip)
+{
+       return fip->request.len;
+}
+
+static __inline void*
+fuse_in_data(struct fuse_ipc *fip)
+{
+       return (struct fuse_in_header*)fuse_in(fip) + 1;
+}
+
+static __inline size_t
+fuse_in_data_size(struct fuse_ipc *fip)
+{
+       return fuse_in_size(fip) - sizeof(struct fuse_in_header);
+}
+
+static __inline void*
+fuse_out(struct fuse_ipc *fip)
+{
+       return fip->reply.buf;
+}
+
+static __inline size_t
+fuse_out_size(struct fuse_ipc *fip)
+{
+       return fip->reply.len;
+}
+
+static __inline void*
+fuse_out_data(struct fuse_ipc *fip)
+{
+       return (struct fuse_out_header*)fuse_out(fip) + 1;
+}
+
+static __inline size_t
+fuse_out_data_size(struct fuse_ipc *fip)
+{
+       return fuse_out_size(fip) - sizeof(struct fuse_out_header);
+}
+
+static __inline void
+fuse_knote(struct vnode *vp, int flags)
+{
+       if (flags)
+               KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
+}
+
+#endif /* FUSE_FUSE_H */
diff --git a/sys/vfs/fuse/fuse_abi.h b/sys/vfs/fuse/fuse_abi.h
new file mode 100644 (file)
index 0000000..b4967d4
--- /dev/null
@@ -0,0 +1,820 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/*
+    This file defines the kernel interface of FUSE
+    Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
+
+    This program can be distributed under the terms of the GNU GPL.
+    See the file COPYING.
+
+    This -- and only this -- header file may also be distributed under
+    the terms of the BSD Licence as follows:
+
+    Copyright (C) 2001-2007 Miklos Szeredi. All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+    1. Redistributions of source code must retain the above copyright
+       notice, this list of conditions and the following disclaimer.
+    2. Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+    THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+    ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+    OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+    OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+    SUCH DAMAGE.
+*/
+
+/*
+ * This file defines the kernel interface of FUSE
+ *
+ * Protocol changelog:
+ *
+ * 7.9:
+ *  - new fuse_getattr_in input argument of GETATTR
+ *  - add lk_flags in fuse_lk_in
+ *  - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in
+ *  - add blksize field to fuse_attr
+ *  - add file flags field to fuse_read_in and fuse_write_in
+ *
+ * 7.10
+ *  - add nonseekable open flag
+ *
+ * 7.11
+ *  - add IOCTL message
+ *  - add unsolicited notification support
+ *  - add POLL message and NOTIFY_POLL notification
+ *
+ * 7.12
+ *  - add umask flag to input argument of open, mknod and mkdir
+ *  - add notification messages for invalidation of inodes and
+ *    directory entries
+ *
+ * 7.13
+ *  - make max number of background requests and congestion threshold
+ *    tunables
+ *
+ * 7.14
+ *  - add splice support to fuse device
+ *
+ * 7.15
+ *  - add store notify
+ *  - add retrieve notify
+ *
+ * 7.16
+ *  - add BATCH_FORGET request
+ *  - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct
+ *    fuse_ioctl_iovec' instead of ambiguous 'struct iovec'
+ *  - add FUSE_IOCTL_32BIT flag
+ *
+ * 7.17
+ *  - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK
+ *
+ * 7.18
+ *  - add FUSE_IOCTL_DIR flag
+ *  - add FUSE_NOTIFY_DELETE
+ *
+ * 7.19
+ *  - add FUSE_FALLOCATE
+ *
+ * 7.20
+ *  - add FUSE_AUTO_INVAL_DATA
+ *
+ * 7.21
+ *  - add FUSE_READDIRPLUS
+ *  - send the requested events in POLL request
+ *
+ * 7.22
+ *  - add FUSE_ASYNC_DIO
+ *
+ * 7.23
+ *  - add FUSE_WRITEBACK_CACHE
+ *  - add time_gran to fuse_init_out
+ *  - add reserved space to fuse_init_out
+ *  - add FATTR_CTIME
+ *  - add ctime and ctimensec to fuse_setattr_in
+ *  - add FUSE_RENAME2 request
+ *  - add FUSE_NO_OPEN_SUPPORT flag
+ *
+ *  7.24
+ *  - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support
+ *
+ *  7.25
+ *  - add FUSE_PARALLEL_DIROPS
+ *
+ *  7.26
+ *  - add FUSE_HANDLE_KILLPRIV
+ *  - add FUSE_POSIX_ACL
+ *
+ *  7.27
+ *  - add FUSE_ABORT_ERROR
+ *
+ *  7.28
+ *  - add FUSE_COPY_FILE_RANGE
+ *  - add FOPEN_CACHE_DIR
+ *  - add FUSE_MAX_PAGES, add max_pages to init_out
+ *  - add FUSE_CACHE_SYMLINKS
+ */
+
+#ifndef _LINUX_FUSE_H
+#define _LINUX_FUSE_H
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+/*
+ * Version negotiation:
+ *
+ * Both the kernel and userspace send the version they support in the
+ * INIT request and reply respectively.
+ *
+ * If the major versions match then both shall use the smallest
+ * of the two minor versions for communication.
+ *
+ * If the kernel supports a larger major version, then userspace shall
+ * reply with the major version it supports, ignore the rest of the
+ * INIT message and expect a new INIT message from the kernel with a
+ * matching major version.
+ *
+ * If the library supports a larger major version, then it shall fall
+ * back to the major protocol version sent by the kernel for
+ * communication and reply with that major version (and an arbitrary
+ * supported minor version).
+ */
+
+/** Version number of this interface */
+#define FUSE_KERNEL_VERSION 7
+
+/** Minor version number of this interface */
+#define FUSE_KERNEL_MINOR_VERSION 28
+
+/** The node ID of the root inode */
+#define FUSE_ROOT_ID 1
+
+/* Make sure all structures are padded to 64bit boundary, so 32bit
+   userspace works under 64bit kernels */
+
+struct fuse_attr {
+       uint64_t        ino;
+       uint64_t        size;
+       uint64_t        blocks;
+       uint64_t        atime;
+       uint64_t        mtime;
+       uint64_t        ctime;
+       uint32_t        atimensec;
+       uint32_t        mtimensec;
+       uint32_t        ctimensec;
+       uint32_t        mode;
+       uint32_t        nlink;
+       uint32_t        uid;
+       uint32_t        gid;
+       uint32_t        rdev;
+       uint32_t        blksize;
+       uint32_t        padding;
+};
+
+struct fuse_kstatfs {
+       uint64_t        blocks;
+       uint64_t        bfree;
+       uint64_t        bavail;
+       uint64_t        files;
+       uint64_t        ffree;
+       uint32_t        bsize;
+       uint32_t        namelen;
+       uint32_t        frsize;
+       uint32_t        padding;
+       uint32_t        spare[6];
+};
+
+struct fuse_file_lock {
+       uint64_t        start;
+       uint64_t        end;
+       uint32_t        type;
+       uint32_t        pid; /* tgid */
+};
+
+/**
+ * Bitmasks for fuse_setattr_in.valid
+ */
+#define FATTR_MODE     (1 << 0)
+#define FATTR_UID      (1 << 1)
+#define FATTR_GID      (1 << 2)
+#define FATTR_SIZE     (1 << 3)
+#define FATTR_ATIME    (1 << 4)
+#define FATTR_MTIME    (1 << 5)
+#define FATTR_FH       (1 << 6)
+#define FATTR_ATIME_NOW        (1 << 7)
+#define FATTR_MTIME_NOW        (1 << 8)
+#define FATTR_LOCKOWNER        (1 << 9)
+#define FATTR_CTIME    (1 << 10)
+
+/**
+ * Flags returned by the OPEN request
+ *
+ * FOPEN_DIRECT_IO: bypass page cache for this open file
+ * FOPEN_KEEP_CACHE: don't invalidate the data cache on open
+ * FOPEN_NONSEEKABLE: the file is not seekable
+ * FOPEN_CACHE_DIR: allow caching this directory
+ */
+#define FOPEN_DIRECT_IO                (1 << 0)
+#define FOPEN_KEEP_CACHE       (1 << 1)
+#define FOPEN_NONSEEKABLE      (1 << 2)
+#define FOPEN_CACHE_DIR                (1 << 3)
+
+/**
+ * INIT request/reply flags
+ *
+ * FUSE_ASYNC_READ: asynchronous read requests
+ * FUSE_POSIX_LOCKS: remote locking for POSIX file locks
+ * FUSE_FILE_OPS: kernel sends file handle for fstat, etc... (not yet supported)
+ * FUSE_ATOMIC_O_TRUNC: handles the O_TRUNC open flag in the filesystem
+ * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
+ * FUSE_BIG_WRITES: filesystem can handle write size larger than 4kB
+ * FUSE_DONT_MASK: don't apply umask to file mode on create operations
+ * FUSE_SPLICE_WRITE: kernel supports splice write on the device
+ * FUSE_SPLICE_MOVE: kernel supports splice move on the device
+ * FUSE_SPLICE_READ: kernel supports splice read on the device
+ * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks
+ * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories
+ * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages
+ * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one)
+ * FUSE_READDIRPLUS_AUTO: adaptive readdirplus
+ * FUSE_ASYNC_DIO: asynchronous direct I/O submission
+ * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes
+ * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens
+ * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir
+ * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc
+ * FUSE_POSIX_ACL: filesystem supports posix acls
+ * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED
+ * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages
+ * FUSE_CACHE_SYMLINKS: cache READLINK responses
+ */
+#define FUSE_ASYNC_READ                (1 << 0)
+#define FUSE_POSIX_LOCKS       (1 << 1)
+#define FUSE_FILE_OPS          (1 << 2)
+#define FUSE_ATOMIC_O_TRUNC    (1 << 3)
+#define FUSE_EXPORT_SUPPORT    (1 << 4)
+#define FUSE_BIG_WRITES                (1 << 5)
+#define FUSE_DONT_MASK         (1 << 6)
+#define FUSE_SPLICE_WRITE      (1 << 7)
+#define FUSE_SPLICE_MOVE       (1 << 8)
+#define FUSE_SPLICE_READ       (1 << 9)
+#define FUSE_FLOCK_LOCKS       (1 << 10)
+#define FUSE_HAS_IOCTL_DIR     (1 << 11)
+#define FUSE_AUTO_INVAL_DATA   (1 << 12)
+#define FUSE_DO_READDIRPLUS    (1 << 13)
+#define FUSE_READDIRPLUS_AUTO  (1 << 14)
+#define FUSE_ASYNC_DIO         (1 << 15)
+#define FUSE_WRITEBACK_CACHE   (1 << 16)
+#define FUSE_NO_OPEN_SUPPORT   (1 << 17)
+#define FUSE_PARALLEL_DIROPS    (1 << 18)
+#define FUSE_HANDLE_KILLPRIV   (1 << 19)
+#define FUSE_POSIX_ACL         (1 << 20)
+#define FUSE_ABORT_ERROR       (1 << 21)
+#define FUSE_MAX_PAGES         (1 << 22)
+#define FUSE_CACHE_SYMLINKS    (1 << 23)
+
+/**
+ * CUSE INIT request/reply flags
+ *
+ * CUSE_UNRESTRICTED_IOCTL:  use unrestricted ioctl
+ */
+#define CUSE_UNRESTRICTED_IOCTL        (1 << 0)
+
+/**
+ * Release flags
+ */
+#define FUSE_RELEASE_FLUSH     (1 << 0)
+#define FUSE_RELEASE_FLOCK_UNLOCK      (1 << 1)
+
+/**
+ * Getattr flags
+ */
+#define FUSE_GETATTR_FH                (1 << 0)
+
+/**
+ * Lock flags
+ */
+#define FUSE_LK_FLOCK          (1 << 0)
+
+/**
+ * WRITE flags
+ *
+ * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed
+ * FUSE_WRITE_LOCKOWNER: lock_owner field is valid
+ */
+#define FUSE_WRITE_CACHE       (1 << 0)
+#define FUSE_WRITE_LOCKOWNER   (1 << 1)
+
+/**
+ * Read flags
+ */
+#define FUSE_READ_LOCKOWNER    (1 << 1)
+
+/**
+ * Ioctl flags
+ *
+ * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine
+ * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
+ * FUSE_IOCTL_RETRY: retry with new iovecs
+ * FUSE_IOCTL_32BIT: 32bit ioctl
+ * FUSE_IOCTL_DIR: is a directory
+ *
+ * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
+ */
+#define FUSE_IOCTL_COMPAT      (1 << 0)
+#define FUSE_IOCTL_UNRESTRICTED        (1 << 1)
+#define FUSE_IOCTL_RETRY       (1 << 2)
+#define FUSE_IOCTL_32BIT       (1 << 3)
+#define FUSE_IOCTL_DIR         (1 << 4)
+
+#define FUSE_IOCTL_MAX_IOV     256
+
+/**
+ * Poll flags
+ *
+ * FUSE_POLL_SCHEDULE_NOTIFY: request poll notify
+ */
+#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0)
+
+enum fuse_opcode {
+       FUSE_LOOKUP             = 1,
+       FUSE_FORGET             = 2,  /* no reply */
+       FUSE_GETATTR            = 3,
+       FUSE_SETATTR            = 4,
+       FUSE_READLINK           = 5,
+       FUSE_SYMLINK            = 6,
+       FUSE_MKNOD              = 8,
+       FUSE_MKDIR              = 9,
+       FUSE_UNLINK             = 10,
+       FUSE_RMDIR              = 11,
+       FUSE_RENAME             = 12,
+       FUSE_LINK               = 13,
+       FUSE_OPEN               = 14,
+       FUSE_READ               = 15,
+       FUSE_WRITE              = 16,
+       FUSE_STATFS             = 17,
+       FUSE_RELEASE            = 18,
+       FUSE_FSYNC              = 20,
+       FUSE_SETXATTR           = 21,
+       FUSE_GETXATTR           = 22,
+       FUSE_LISTXATTR          = 23,
+       FUSE_REMOVEXATTR        = 24,
+       FUSE_FLUSH              = 25,
+       FUSE_INIT               = 26,
+       FUSE_OPENDIR            = 27,
+       FUSE_READDIR            = 28,
+       FUSE_RELEASEDIR         = 29,
+       FUSE_FSYNCDIR           = 30,
+       FUSE_GETLK              = 31,
+       FUSE_SETLK              = 32,
+       FUSE_SETLKW             = 33,
+       FUSE_ACCESS             = 34,
+       FUSE_CREATE             = 35,
+       FUSE_INTERRUPT          = 36,
+       FUSE_BMAP               = 37,
+       FUSE_DESTROY            = 38,
+       FUSE_IOCTL              = 39,
+       FUSE_POLL               = 40,
+       FUSE_NOTIFY_REPLY       = 41,
+       FUSE_BATCH_FORGET       = 42,
+       FUSE_FALLOCATE          = 43,
+       FUSE_READDIRPLUS        = 44,
+       FUSE_RENAME2            = 45,
+       FUSE_LSEEK              = 46,
+       FUSE_COPY_FILE_RANGE    = 47,
+
+       /* CUSE specific operations */
+       CUSE_INIT               = 4096,
+};
+
+enum fuse_notify_code {
+       FUSE_NOTIFY_POLL   = 1,
+       FUSE_NOTIFY_INVAL_INODE = 2,
+       FUSE_NOTIFY_INVAL_ENTRY = 3,
+       FUSE_NOTIFY_STORE = 4,
+       FUSE_NOTIFY_RETRIEVE = 5,
+       FUSE_NOTIFY_DELETE = 6,
+       FUSE_NOTIFY_CODE_MAX,
+};
+
+/* The read buffer is required to be at least 8k, but may be much larger */
+#define FUSE_MIN_READ_BUFFER 8192
+
+#define FUSE_COMPAT_ENTRY_OUT_SIZE 120
+
+struct fuse_entry_out {
+       uint64_t        nodeid;         /* Inode ID */
+       uint64_t        generation;     /* Inode generation: nodeid:gen must
+                                          be unique for the fs's lifetime */
+       uint64_t        entry_valid;    /* Cache timeout for the name */
+       uint64_t        attr_valid;     /* Cache timeout for the attributes */
+       uint32_t        entry_valid_nsec;
+       uint32_t        attr_valid_nsec;
+       struct fuse_attr attr;
+};
+
+struct fuse_forget_in {
+       uint64_t        nlookup;
+};
+
+struct fuse_forget_one {
+       uint64_t        nodeid;
+       uint64_t        nlookup;
+};
+
+struct fuse_batch_forget_in {
+       uint32_t        count;
+       uint32_t        dummy;
+};
+
+struct fuse_getattr_in {
+       uint32_t        getattr_flags;
+       uint32_t        dummy;
+       uint64_t        fh;
+};
+
+#define FUSE_COMPAT_ATTR_OUT_SIZE 96
+
+struct fuse_attr_out {
+       uint64_t        attr_valid;     /* Cache timeout for the attributes */
+       uint32_t        attr_valid_nsec;
+       uint32_t        dummy;
+       struct fuse_attr attr;
+};
+
+#define FUSE_COMPAT_MKNOD_IN_SIZE 8
+
+struct fuse_mknod_in {
+       uint32_t        mode;
+       uint32_t        rdev;
+       uint32_t        umask;
+       uint32_t        padding;
+};
+
+struct fuse_mkdir_in {
+       uint32_t        mode;
+       uint32_t        umask;
+};
+
+struct fuse_rename_in {
+       uint64_t        newdir;
+};
+
+struct fuse_rename2_in {
+       uint64_t        newdir;
+       uint32_t        flags;
+       uint32_t        padding;
+};
+
+struct fuse_link_in {
+       uint64_t        oldnodeid;
+};
+
+struct fuse_setattr_in {
+       uint32_t        valid;
+       uint32_t        padding;
+       uint64_t        fh;
+       uint64_t        size;
+       uint64_t        lock_owner;
+       uint64_t        atime;
+       uint64_t        mtime;
+       uint64_t        ctime;
+       uint32_t        atimensec;
+       uint32_t        mtimensec;
+       uint32_t        ctimensec;
+       uint32_t        mode;
+       uint32_t        unused4;
+       uint32_t        uid;
+       uint32_t        gid;
+       uint32_t        unused5;
+};
+
+struct fuse_open_in {
+       uint32_t        flags;
+       uint32_t        unused;
+};
+
+struct fuse_create_in {
+       uint32_t        flags;
+       uint32_t        mode;
+       uint32_t        umask;
+       uint32_t        padding;
+};
+
+struct fuse_open_out {
+       uint64_t        fh;
+       uint32_t        open_flags;
+       uint32_t        padding;
+};
+
+struct fuse_release_in {
+       uint64_t        fh;
+       uint32_t        flags;
+       uint32_t        release_flags;
+       uint64_t        lock_owner;
+};
+
+struct fuse_flush_in {
+       uint64_t        fh;
+       uint32_t        unused;
+       uint32_t        padding;
+       uint64_t        lock_owner;
+};
+
+struct fuse_read_in {
+       uint64_t        fh;
+       uint64_t        offset;
+       uint32_t        size;
+       uint32_t        read_flags;
+       uint64_t        lock_owner;
+       uint32_t        flags;
+       uint32_t        padding;
+};
+
+#define FUSE_COMPAT_WRITE_IN_SIZE 24
+
+struct fuse_write_in {
+       uint64_t        fh;
+       uint64_t        offset;
+       uint32_t        size;
+       uint32_t        write_flags;
+       uint64_t        lock_owner;
+       uint32_t        flags;
+       uint32_t        padding;
+};
+
+struct fuse_write_out {
+       uint32_t        size;
+       uint32_t        padding;
+};
+
+#define FUSE_COMPAT_STATFS_SIZE 48
+
+struct fuse_statfs_out {
+       struct fuse_kstatfs st;
+};
+
+struct fuse_fsync_in {
+       uint64_t        fh;
+       uint32_t        fsync_flags;
+       uint32_t        padding;
+};
+
+struct fuse_setxattr_in {
+       uint32_t        size;
+       uint32_t        flags;
+};
+
+struct fuse_getxattr_in {
+       uint32_t        size;
+       uint32_t        padding;
+};
+
+struct fuse_getxattr_out {
+       uint32_t        size;
+       uint32_t        padding;
+};
+
+struct fuse_lk_in {
+       uint64_t        fh;
+       uint64_t        owner;
+       struct fuse_file_lock lk;
+       uint32_t        lk_flags;
+       uint32_t        padding;
+};
+
+struct fuse_lk_out {
+       struct fuse_file_lock lk;
+};
+
+struct fuse_access_in {
+       uint32_t        mask;
+       uint32_t        padding;
+};
+
+struct fuse_init_in {
+       uint32_t        major;
+       uint32_t        minor;
+       uint32_t        max_readahead;
+       uint32_t        flags;
+};
+
+#define FUSE_COMPAT_INIT_OUT_SIZE 8
+#define FUSE_COMPAT_22_INIT_OUT_SIZE 24
+
+struct fuse_init_out {
+       uint32_t        major;
+       uint32_t        minor;
+       uint32_t        max_readahead;
+       uint32_t        flags;
+       uint16_t        max_background;
+       uint16_t        congestion_threshold;
+       uint32_t        max_write;
+       uint32_t        time_gran;
+       uint16_t        max_pages;
+       uint16_t        padding;
+       uint32_t        unused[8];
+};
+
+#define CUSE_INIT_INFO_MAX 4096
+
+struct cuse_init_in {
+       uint32_t        major;
+       uint32_t        minor;
+       uint32_t        unused;
+       uint32_t        flags;
+};
+
+struct cuse_init_out {
+       uint32_t        major;
+       uint32_t        minor;
+       uint32_t        unused;
+       uint32_t        flags;
+       uint32_t        max_read;
+       uint32_t        max_write;
+       uint32_t        dev_major;              /* chardev major */
+       uint32_t        dev_minor;              /* chardev minor */
+       uint32_t        spare[10];
+};
+
+struct fuse_interrupt_in {
+       uint64_t        unique;
+};
+
+struct fuse_bmap_in {
+       uint64_t        block;
+       uint32_t        blocksize;
+       uint32_t        padding;
+};
+
+struct fuse_bmap_out {
+       uint64_t        block;
+};
+
+struct fuse_ioctl_in {
+       uint64_t        fh;
+       uint32_t        flags;
+       uint32_t        cmd;
+       uint64_t        arg;
+       uint32_t        in_size;
+       uint32_t        out_size;
+};
+
+struct fuse_ioctl_iovec {
+       uint64_t        base;
+       uint64_t        len;
+};
+
+struct fuse_ioctl_out {
+       int32_t         result;
+       uint32_t        flags;
+       uint32_t        in_iovs;
+       uint32_t        out_iovs;
+};
+
+struct fuse_poll_in {
+       uint64_t        fh;
+       uint64_t        kh;
+       uint32_t        flags;
+       uint32_t        events;
+};
+
+struct fuse_poll_out {
+       uint32_t        revents;
+       uint32_t        padding;
+};
+
+struct fuse_notify_poll_wakeup_out {
+       uint64_t        kh;
+};
+
+struct fuse_fallocate_in {
+       uint64_t        fh;
+       uint64_t        offset;
+       uint64_t        length;
+       uint32_t        mode;
+       uint32_t        padding;
+};
+
+struct fuse_in_header {
+       uint32_t        len;
+       uint32_t        opcode;
+       uint64_t        unique;
+       uint64_t        nodeid;
+       uint32_t        uid;
+       uint32_t        gid;
+       uint32_t        pid;
+       uint32_t        padding;
+};
+
+struct fuse_out_header {
+       uint32_t        len;
+       int32_t         error;
+       uint64_t        unique;
+};
+
+struct fuse_dirent {
+       uint64_t        ino;
+       uint64_t        off;
+       uint32_t        namelen;
+       uint32_t        type;
+       char name[];
+};
+
+#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
+#define FUSE_DIRENT_ALIGN(x) \
+       (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1))
+#define FUSE_DIRENT_SIZE(d) \
+       FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
+
+struct fuse_direntplus {
+       struct fuse_entry_out entry_out;
+       struct fuse_dirent dirent;
+};
+
+#define FUSE_NAME_OFFSET_DIRENTPLUS \
+       offsetof(struct fuse_direntplus, dirent.name)
+#define FUSE_DIRENTPLUS_SIZE(d) \
+       FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen)
+
+struct fuse_notify_inval_inode_out {
+       uint64_t        ino;
+       int64_t         off;
+       int64_t         len;
+};
+
+struct fuse_notify_inval_entry_out {
+       uint64_t        parent;
+       uint32_t        namelen;
+       uint32_t        padding;
+};
+
+struct fuse_notify_delete_out {
+       uint64_t        parent;
+       uint64_t        child;
+       uint32_t        namelen;
+       uint32_t        padding;
+};
+
+struct fuse_notify_store_out {
+       uint64_t        nodeid;
+       uint64_t        offset;
+       uint32_t        size;
+       uint32_t        padding;
+};
+
+struct fuse_notify_retrieve_out {
+       uint64_t        notify_unique;
+       uint64_t        nodeid;
+       uint64_t        offset;
+       uint32_t        size;
+       uint32_t        padding;
+};
+
+/* Matches the size of fuse_write_in */
+struct fuse_notify_retrieve_in {
+       uint64_t        dummy1;
+       uint64_t        offset;
+       uint32_t        size;
+       uint32_t        dummy2;
+       uint64_t        dummy3;
+       uint64_t        dummy4;
+};
+
+/* Device ioctls: */
+#define FUSE_DEV_IOC_CLONE     _IOR(229, 0, uint32_t)
+
+struct fuse_lseek_in {
+       uint64_t        fh;
+       uint64_t        offset;
+       uint32_t        whence;
+       uint32_t        padding;
+};
+
+struct fuse_lseek_out {
+       uint64_t        offset;
+};
+
+struct fuse_copy_file_range_in {
+       uint64_t        fh_in;
+       uint64_t        off_in;
+       uint64_t        nodeid_out;
+       uint64_t        fh_out;
+       uint64_t        off_out;
+       uint64_t        len;
+       uint64_t        flags;
+};
+
+#endif /* _LINUX_FUSE_H */
diff --git a/sys/vfs/fuse/fuse_debug.h b/sys/vfs/fuse/fuse_debug.h
new file mode 100644 (file)
index 0000000..e583e33
--- /dev/null
@@ -0,0 +1,57 @@
+/*-
+ * Copyright (c) 2019 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2019 The DragonFly Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef FUSE_DEBUG_H
+#define FUSE_DEBUG_H
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include "fuse_abi.h"
+
+#if 1
+#define FUSE_CMDNAME (curproc ? curproc->p_comm : "...")
+#define FUSE_CMDPID (curproc ? curproc->p_pid : -1)
+#define fuse_print(X, ...)     \
+       kprintf("%s(%s|%d): " X, __func__, FUSE_CMDNAME, FUSE_CMDPID, ## __VA_ARGS__)
+#define fuse_panic(X, ...)     \
+       panic("%s(%s|%d): " X, __func__, FUSE_CMDNAME, FUSE_CMDPID, ## __VA_ARGS__)
+#define fuse_dbg(X, ...)       if (fuse_debug) \
+       kprintf("### %s(%s|%d): " X, __func__, FUSE_CMDNAME, FUSE_CMDPID, ## __VA_ARGS__)
+#define fuse_dbgipc(fip, error, msg) do {                                      \
+       struct fuse_in_header *ihd = fuse_in(fip);                              \
+       fuse_dbg("fip=%p ino=%ju op=%s len=%u error=%d %s\n",                   \
+           fip, ihd->nodeid, fuse_get_ops(ihd->opcode), ihd->len, error, msg); \
+       } while (0)
+#else
+#define fuse_print(X, ...) kprintf(X, ## __VA_ARGS__)
+#define fuse_panic(X, ...) panic(X, ## __VA_ARGS__)
+#define fuse_dbg(X, ...) do { } while (0)
+#define fuse_dbgipc(fip, error, msg) do { } while (0)
+#endif
+
+#endif /* FUSE_DEBUG_H */
diff --git a/sys/vfs/fuse/fuse_device.c b/sys/vfs/fuse/fuse_device.c
new file mode 100644 (file)
index 0000000..5efc311
--- /dev/null
@@ -0,0 +1,320 @@
+/*-
+ * Copyright (c) 2019 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2019 The DragonFly Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "fuse.h"
+
+#include <sys/conf.h>
+#include <sys/device.h>
+#include <sys/devfs.h>
+#include <sys/uio.h>
+
+static int fuse_cdevpriv_close(struct fuse_mount*);
+static struct cdev *fuse_dev;
+
+static void
+fuse_cdevpriv_dtor(void *data)
+{
+       struct fuse_mount *fmp = data;
+
+       if (!fuse_cdevpriv_close(fmp))
+               fuse_mount_free(fmp);
+}
+
+static int
+fuse_device_open(struct dev_open_args *ap)
+{
+       struct fuse_mount *fmp;
+
+       fmp = kmalloc(sizeof(*fmp), M_TEMP, M_WAITOK | M_ZERO);
+       KKASSERT(fmp);
+
+       refcount_init(&fmp->refcnt, 1);
+       devfs_set_cdevpriv(ap->a_fp, fmp, fuse_cdevpriv_dtor);
+       fuse_dbg("open %s\n", ap->a_head.a_dev->si_name);
+
+       return 0;
+}
+
+static int
+fuse_device_close(struct dev_close_args *ap)
+{
+       struct fuse_mount *fmp;
+       int error;
+
+       error = devfs_get_cdevpriv(ap->a_fp, (void**)&fmp);
+       if (error)
+               return error;
+       KKASSERT(fmp);
+
+       /* XXX Can't call this on device close due to devfs bug... */
+       //fuse_cdevpriv_close(fmp);
+       fuse_dbg("close %s\n", ap->a_head.a_dev->si_name);
+
+       return 0;
+}
+
+static int
+fuse_cdevpriv_close(struct fuse_mount *fmp)
+{
+       if (!fmp->devvp) {
+               fuse_print("/dev/%s not associated with FUSE mount\n",
+                   fuse_dev->si_name);
+               return ENODEV;
+       }
+
+       mtx_lock(&fmp->mnt_lock);
+       if (fuse_mount_kill(fmp) == -1)
+               KNOTE(&fmp->kq.ki_note, 0);
+       KKASSERT(fmp->devvp);
+       mtx_unlock(&fmp->mnt_lock);
+
+       return 0;
+}
+
+/* Call with ->ipc_lock held. */
+static void
+fuse_device_clear(struct fuse_mount *fmp)
+{
+       struct fuse_ipc *fip;
+
+       while ((fip = TAILQ_FIRST(&fmp->request_head)))
+               TAILQ_REMOVE(&fmp->request_head, fip, request_entry);
+
+       while ((fip = TAILQ_FIRST(&fmp->reply_head))) {
+               TAILQ_REMOVE(&fmp->reply_head, fip, reply_entry);
+               if (fuse_ipc_test_and_set_replied(fip))
+                       wakeup(fip);
+       }
+}
+
+static int
+fuse_device_read(struct dev_read_args *ap)
+{
+       struct uio *uio = ap->a_uio;
+       struct fuse_mount *fmp;
+       struct fuse_ipc *fip;
+       int error;
+
+       error = devfs_get_cdevpriv(ap->a_fp, (void**)&fmp);
+       if (error)
+               return error;
+
+       if (fuse_test_dead(fmp))
+               return ENOTCONN;
+
+       mtx_lock(&fmp->ipc_lock);
+       while (!(fip = TAILQ_FIRST(&fmp->request_head))) {
+               error = mtxsleep(fmp, &fmp->ipc_lock, PCATCH, "ftxc", 0);
+               if (fuse_test_dead(fmp)) {
+                       fuse_device_clear(fmp);
+                       mtx_unlock(&fmp->ipc_lock);
+                       fuse_dbg("error=%d dead\n", error);
+                       return ENOTCONN;
+               }
+               if (error) {
+                       mtx_unlock(&fmp->ipc_lock);
+                       fuse_dbg("error=%d\n", error);
+                       return error;
+               }
+       }
+       TAILQ_REMOVE(&fmp->request_head, fip, request_entry);
+       mtx_unlock(&fmp->ipc_lock);
+
+       fuse_dbgipc(fip, 0, "");
+
+       if (uio->uio_resid < fuse_in_size(fip))
+               return EILSEQ;
+
+       return uiomove(fuse_in(fip), fuse_in_size(fip), uio);
+}
+
+static int
+fuse_device_write(struct dev_write_args *ap)
+{
+       struct uio *uio = ap->a_uio;
+       struct fuse_mount *fmp;
+       struct fuse_ipc *fip, *tmp;
+       struct fuse_buf fb;
+       struct fuse_in_header *ihd;
+       struct fuse_out_header *ohd;
+       bool found = false;
+       int error;
+
+       error = devfs_get_cdevpriv(ap->a_fp, (void**)&fmp);
+       if (error)
+               return error;
+
+       if (uio->uio_resid < sizeof(*ohd))
+               return EILSEQ;
+
+       fuse_buf_alloc(&fb, uio->uio_resid);
+       error = uiomove(fb.buf, uio->uio_resid, uio);
+       if (error) {
+               fuse_buf_free(&fb);
+               return error;
+       }
+       ohd = fb.buf;
+
+       mtx_lock(&fmp->ipc_lock);
+       TAILQ_FOREACH_MUTABLE(fip, &fmp->reply_head, reply_entry, tmp) {
+               if (fip->unique == ohd->unique) {
+                       TAILQ_REMOVE(&fmp->reply_head, fip, reply_entry);
+                       found = true;
+                       break;
+               }
+       }
+       mtx_unlock(&fmp->ipc_lock);
+
+       if (!found) {
+               fuse_dbg("unique=%ju not found\n", ohd->unique);
+               fuse_buf_free(&fb);
+               return ENOMSG;
+       }
+
+       fip->reply = fb;
+       ihd = fuse_in(fip);
+
+       /* Non zero ohd->error is not /dev/fuse write error. */
+       if (ohd->error == -ENOSYS) {
+               fuse_set_nosys(fmp, ihd->opcode);
+               fuse_dbgipc(fip, ohd->error, "ENOSYS");
+       } else if (!ohd->error && fuse_audit_length(ihd, ohd)) {
+               error = EPROTO;
+               fuse_dbgipc(fip, error, "audit");
+       } else
+               fuse_dbgipc(fip, 0, "");
+
+       /* Complete the IPC regardless of above result. */
+       if (fuse_ipc_test_and_set_replied(fip))
+               wakeup(fip);
+
+       return error;
+}
+
+static void filt_fusedevdetach(struct knote*);
+static int filt_fusedevread(struct knote*, long);
+static int filt_fusedevwrite(struct knote*, long);
+
+static struct filterops fusedevread_filterops =
+       { FILTEROP_ISFD,
+         NULL, filt_fusedevdetach, filt_fusedevread };
+static struct filterops fusedevwrite_filterops =
+       { FILTEROP_ISFD,
+         NULL, filt_fusedevdetach, filt_fusedevwrite };
+
+static int
+fuse_device_kqfilter(struct dev_kqfilter_args *ap)
+{
+       struct knote *kn = ap->a_kn;
+       struct klist *klist;
+       struct fuse_mount *fmp;
+       int error;
+
+       error = devfs_get_cdevpriv(ap->a_fp, (void**)&fmp);
+       if (error) {
+               ap->a_result = error;
+               return 0;
+       }
+
+       ap->a_result = 0;
+
+       switch (kn->kn_filter) {
+       case EVFILT_READ:
+               kn->kn_fop = &fusedevread_filterops;
+               kn->kn_hook = (caddr_t)fmp;
+               break;
+       case EVFILT_WRITE:
+               kn->kn_fop = &fusedevwrite_filterops;
+               kn->kn_hook = (caddr_t)fmp;
+               break;
+       default:
+               ap->a_result = EOPNOTSUPP;
+               return 0;
+       }
+
+       klist = &fmp->kq.ki_note;
+       knote_insert(klist, kn);
+
+       return 0;
+}
+
+static void
+filt_fusedevdetach(struct knote *kn)
+{
+       struct fuse_mount *fmp = (struct fuse_mount*)kn->kn_hook;
+       struct klist *klist = &fmp->kq.ki_note;
+
+       knote_remove(klist, kn);
+}
+
+static int
+filt_fusedevread(struct knote *kn, long hint)
+{
+       struct fuse_mount *fmp = (struct fuse_mount*)kn->kn_hook;
+       int ready = 0;
+
+       mtx_lock(&fmp->ipc_lock);
+       if (!TAILQ_EMPTY(&fmp->request_head))
+               ready = 1;
+       mtx_unlock(&fmp->ipc_lock);
+
+       return ready;
+}
+
+static int
+filt_fusedevwrite(struct knote *kn, long hint)
+{
+       return 1;
+}
+
+static struct dev_ops fuse_device_cdevsw = {
+       { "fuse", 0, D_MPSAFE, },
+       .d_open = fuse_device_open,
+       .d_close = fuse_device_close,
+       .d_read = fuse_device_read,
+       .d_write = fuse_device_write,
+       .d_kqfilter = fuse_device_kqfilter,
+};
+
+int
+fuse_device_init(void)
+{
+       fuse_dev = make_dev(&fuse_device_cdevsw, 0, UID_ROOT, GID_OPERATOR,
+           S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP, "fuse");
+
+       if (!fuse_dev)
+               return ENOMEM;
+
+       return 0;
+}
+
+void
+fuse_device_cleanup(void)
+{
+       KKASSERT(fuse_dev);
+       destroy_dev(fuse_dev);
+}
diff --git a/sys/vfs/fuse/fuse_file.c b/sys/vfs/fuse/fuse_file.c
new file mode 100644 (file)
index 0000000..3c9eda7
--- /dev/null
@@ -0,0 +1,105 @@
+/*-
+ * Copyright (c) 2019 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2019 The DragonFly Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "fuse.h"
+
+static MALLOC_DEFINE(M_FUSE_FH, "fuse_fh", "FUSE fh");
+
+static struct objcache *fuse_fh_objcache = NULL;
+static struct objcache_malloc_args fuse_fh_args = {
+       sizeof(uint64_t), M_FUSE_FH,
+};
+
+uint64_t fuse_fh(struct file *fp)
+{
+       uint64_t *fhp = fp->private_data;
+       KKASSERT(fhp);
+
+       fuse_dbg("fh=%jx\n", *fhp);
+       return *fhp;
+}
+
+void fuse_get_fh(struct file *fp, uint64_t fh)
+{
+       uint64_t *fhp = objcache_get(fuse_fh_objcache, M_WAITOK);
+       KKASSERT(fhp);
+
+       *fhp = fh;
+       fuse_dbg("fh=%jx\n", *fhp);
+
+       KKASSERT(!fp->private_data);
+       fp->private_data = fhp;
+}
+
+void fuse_put_fh(struct file *fp)
+{
+       uint64_t *fhp = fp->private_data;
+       KKASSERT(fhp);
+
+       fuse_dbg("fh=%jx\n", *fhp);
+
+       objcache_put(fuse_fh_objcache, fhp);
+       fp->private_data = NULL;
+}
+
+/*
+ * nfh - per node fh (ad-hoc hack)
+ *
+ * XXX This should be gone, as the concept of nfh is already wrong.
+ * This exists due to how BSD VFS is implemented.
+ * There are situations where FUSE VOP's can't access fh required by FUSE ops.
+ */
+uint64_t fuse_nfh(struct fuse_node *fnp)
+{
+       fuse_dbg("ino=%ju fh=%jx\n", fnp->ino, fnp->fh);
+       return fnp->fh;
+}
+
+void fuse_get_nfh(struct fuse_node *fnp, uint64_t fh)
+{
+       fnp->fh = fh;
+       fuse_dbg("ino=%ju fh=%jx\n", fnp->ino, fnp->fh);
+}
+
+void fuse_put_nfh(struct fuse_node *fnp)
+{
+       fuse_dbg("ino=%ju fh=%jx\n", fnp->ino, fnp->fh);
+}
+
+void
+fuse_file_init(void)
+{
+       fuse_fh_objcache = objcache_create("fuse_fh", 0, 0,
+           NULL, NULL, NULL,
+           objcache_malloc_alloc_zero, objcache_malloc_free, &fuse_fh_args);
+}
+
+void
+fuse_file_cleanup(void)
+{
+       objcache_destroy(fuse_fh_objcache);
+}
diff --git a/sys/vfs/fuse/fuse_io.c b/sys/vfs/fuse/fuse_io.c
new file mode 100644 (file)
index 0000000..6ac6632
--- /dev/null
@@ -0,0 +1,346 @@
+/*-
+ * Copyright (c) 2019 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2019 The DragonFly Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "fuse.h"
+
+#include <sys/uio.h>
+#include <sys/buf2.h>
+
+static void
+fuse_brelse(struct buf *bp)
+{
+       bp->b_flags |= B_INVAL | B_RELBUF;
+       brelse(bp);
+}
+
+static void
+fuse_fix_size(struct fuse_node *fnp, bool fixsize, size_t oldsize)
+{
+       if (fixsize)
+               fuse_node_truncate(fnp, fnp->size, oldsize);
+}
+
+int
+fuse_read(struct vop_read_args *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       struct uio *uio = ap->a_uio;
+       struct fuse_mount *fmp = VFSTOFUSE(vp->v_mount);
+       struct fuse_node *fnp = VTOI(vp);
+       bool need_reopen = !curproc || fnp->closed; /* XXX */
+       int error = 0;
+
+       while (uio->uio_resid > 0 && uio->uio_offset < fnp->size) {
+               struct file *fp;
+               struct buf *bp;
+               struct fuse_ipc *fip;
+               struct fuse_read_in *fri;
+               off_t base_offset, buf_offset;
+               size_t len;
+               uint64_t fh;
+
+               fh = fuse_nfh(VTOI(vp));
+               if (ap->a_fp)
+                       fh = fuse_fh(ap->a_fp);
+
+               buf_offset = (off_t)uio->uio_offset & FUSE_BLKMASK64;
+               base_offset = (off_t)uio->uio_offset - buf_offset;
+
+               fuse_dbg("uio_offset=%ju uio_resid=%ju base_offset=%ju "
+                   "buf_offset=%ju\n",
+                   uio->uio_offset, uio->uio_resid, base_offset, buf_offset);
+
+               bp = getblk(vp, base_offset, FUSE_BLKSIZE, 0, 0);
+               KKASSERT(bp);
+               if ((bp->b_flags & (B_INVAL | B_CACHE | B_RAM)) == B_CACHE) {
+                       bp->b_flags &= ~B_AGE;
+                       goto skip;
+               }
+               if (ap->a_ioflag & IO_NRDELAY) {
+                       bqrelse(bp);
+                       return EWOULDBLOCK;
+               }
+
+               error = breadnx(vp, base_offset, FUSE_BLKSIZE, B_NOTMETA, NULL,
+                   NULL, 0, &bp);
+               KKASSERT(!error);
+
+               fuse_dbg("b_loffset=%ju b_bcount=%d b_flags=%x\n",
+                   bp->b_loffset, bp->b_bcount, bp->b_flags);
+
+               if (need_reopen) {
+                       error = falloc(NULL, &fp, NULL);
+                       if (error) {
+                               fuse_brelse(bp);
+                               break;
+                       }
+                       error = VOP_OPEN(vp, FREAD | FWRITE, ap->a_cred, fp);
+                       if (error) {
+                               fuse_brelse(bp);
+                               break;
+                       }
+               }
+
+               fip = fuse_ipc_get(fmp, sizeof(*fri));
+               fri = fuse_ipc_fill(fip, FUSE_READ, fnp->ino, ap->a_cred);
+               fri->offset = bp->b_loffset;
+               fri->size = bp->b_bcount;
+               if (need_reopen)
+                       fri->fh = fuse_nfh(VTOI(vp));
+               else
+                       fri->fh = fh;
+
+               fuse_dbg("fuse_read_in offset=%ju size=%u fh=%jx\n",
+                   fri->offset, fri->size, fri->fh);
+
+               error = fuse_ipc_tx(fip);
+               if (error) {
+                       fuse_brelse(bp);
+                       break;
+               }
+               memcpy(bp->b_data, fuse_out_data(fip), fuse_out_data_size(fip));
+               fuse_ipc_put(fip);
+
+               if (need_reopen) {
+                       error = fdrop(fp); /* calls VOP_CLOSE() */
+                       if (error) {
+                               fuse_brelse(bp);
+                               break;
+                       }
+               }
+skip:
+               len = FUSE_BLKSIZE - buf_offset;
+               if (len > uio->uio_resid)
+                       len = uio->uio_resid;
+               if (uio->uio_offset + len > fnp->size)
+                       len = (size_t)(fnp->size - uio->uio_offset);
+               fuse_dbg("size=%ju len=%ju\n", fnp->size, len);
+
+               error = uiomovebp(bp, bp->b_data + buf_offset, len, uio);
+               bqrelse(bp);
+               if (error)
+                       break;
+       }
+
+       fuse_dbg("uio_offset=%ju uio_resid=%ju error=%d done\n",
+           uio->uio_offset, uio->uio_resid, error);
+
+       return error;
+}
+
+int
+fuse_write(struct vop_write_args *ap)
+{
+       return fuse_dio_write(ap);
+}
+
+int
+fuse_dio_write(struct vop_write_args *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       struct uio *uio = ap->a_uio;
+       struct fuse_mount *fmp = VFSTOFUSE(vp->v_mount);
+       struct fuse_node *fnp = VTOI(vp);
+       bool need_reopen = !curproc || fnp->closed; /* XXX */
+       int kflags = 0;
+       int error = 0;
+
+       if (ap->a_ioflag & IO_APPEND)
+               uio->uio_offset = fnp->size;
+
+       while (uio->uio_resid > 0) {
+               struct file *fp;
+               struct buf *bp;
+               struct fuse_ipc *fip;
+               struct fuse_read_in *fri;
+               struct fuse_write_in *fwi;
+               struct fuse_write_out *fwo;
+               off_t base_offset, buf_offset;
+               size_t len, oldsize;
+               uint64_t fh;
+               bool fixsize = false;
+               bool need_read = false;
+
+               fh = fuse_nfh(VTOI(vp));
+               if (ap->a_fp)
+                       fh = fuse_fh(ap->a_fp);
+
+               buf_offset = (off_t)uio->uio_offset & FUSE_BLKMASK64;
+               base_offset = (off_t)uio->uio_offset - buf_offset;
+
+               fuse_dbg("uio_offset=%ju uio_resid=%ju base_offset=%ju "
+                   "buf_offset=%ju\n",
+                   uio->uio_offset, uio->uio_resid, base_offset, buf_offset);
+
+               oldsize = fnp->size;
+               len = FUSE_BLKSIZE - buf_offset;
+               if (len > uio->uio_resid)
+                       len = uio->uio_resid;
+               if (uio->uio_offset + len > fnp->size) {
+                       /* XXX trivial flag */
+                       error = fuse_node_truncate(fnp, fnp->size,
+                           uio->uio_offset + len);
+                       if (error)
+                               break;
+                       fixsize = true;
+                       kflags |= NOTE_EXTEND;
+               }
+               fuse_dbg("size=%ju len=%ju\n", fnp->size, len);
+
+               bp = NULL;
+               if (uio->uio_segflg == UIO_NOCOPY) {
+                       bp = getblk(ap->a_vp, base_offset, FUSE_BLKSIZE,
+                           GETBLK_BHEAVY, 0);
+                       if (!(bp->b_flags & B_CACHE)) {
+                               bqrelse(bp);
+                               need_read = true;
+                       }
+               } else if (!buf_offset && uio->uio_resid >= FUSE_BLKSIZE) {
+                       bp = getblk(ap->a_vp, base_offset, FUSE_BLKSIZE,
+                           GETBLK_BHEAVY, 0);
+                       if (!(bp->b_flags & B_CACHE))
+                               vfs_bio_clrbuf(bp);
+               } else if (base_offset >= fnp->size) {
+                       bp = getblk(ap->a_vp, base_offset, FUSE_BLKSIZE,
+                           GETBLK_BHEAVY, 0);
+                       vfs_bio_clrbuf(bp);
+               } else {
+                       need_read = true;
+               }
+
+               if (bp)
+                       fuse_dbg("b_loffset=%ju b_bcount=%d b_flags=%x\n",
+                           bp->b_loffset, bp->b_bcount, bp->b_flags);
+
+               if (need_reopen) {
+                       error = falloc(NULL, &fp, NULL);
+                       if (error) {
+                               fuse_brelse(bp);
+                               fuse_fix_size(fnp, fixsize, oldsize);
+                               break;
+                       }
+                       /* XXX can panic at vref() in vop_stdopen() */
+                       error = VOP_OPEN(vp, FREAD | FWRITE, ap->a_cred, fp);
+                       if (error) {
+                               fuse_brelse(bp);
+                               fuse_fix_size(fnp, fixsize, oldsize);
+                               break;
+                       }
+               }
+
+               if (need_read) {
+                       error = bread(ap->a_vp, base_offset, FUSE_BLKSIZE, &bp);
+                       KKASSERT(!error);
+
+                       fuse_dbg("b_loffset=%ju b_bcount=%d b_flags=%x\n",
+                           bp->b_loffset, bp->b_bcount, bp->b_flags);
+
+                       if (bp->b_loffset + (buf_offset + len) > oldsize) {
+                               memset(bp->b_data, 0, FUSE_BLKSIZE); /* XXX */
+                               goto skip; /* prevent EBADF */
+                       }
+
+                       fip = fuse_ipc_get(fmp, sizeof(*fri));
+                       fri = fuse_ipc_fill(fip, FUSE_READ, fnp->ino,
+                           ap->a_cred);
+                       fri->offset = bp->b_loffset;
+                       fri->size = buf_offset + len;
+                       if (need_reopen)
+                               fri->fh = fuse_nfh(VTOI(vp));
+                       else
+                               fri->fh = fh;
+
+                       fuse_dbg("fuse_read_in offset=%ju size=%u fh=%jx\n",
+                           fri->offset, fri->size, fri->fh);
+
+                       error = fuse_ipc_tx(fip);
+                       if (error) {
+                               fuse_brelse(bp);
+                               fuse_fix_size(fnp, fixsize, oldsize);
+                               break;
+                       }
+                       memcpy(bp->b_data, fuse_out_data(fip),
+                           fuse_out_data_size(fip));
+                       fuse_ipc_put(fip);
+               }
+skip:
+               error = uiomovebp(bp, bp->b_data + buf_offset, len, uio);
+               if (error) {
+                       bqrelse(bp);
+                       fuse_fix_size(fnp, fixsize, oldsize);
+                       break;
+               }
+               kflags |= NOTE_WRITE;
+
+               fip = fuse_ipc_get(fmp, sizeof(*fwi) + len);
+               fwi = fuse_ipc_fill(fip, FUSE_WRITE, fnp->ino, ap->a_cred);
+               fwi->offset = bp->b_loffset + buf_offset;
+               fwi->size = len;
+               if (need_reopen)
+                       fwi->fh = fuse_nfh(VTOI(vp));
+               else
+                       fwi->fh = fh;
+               memcpy((void*)(fwi + 1), bp->b_data + buf_offset, len);
+
+               fuse_dbg("fuse_write_in offset=%ju size=%u fh=%jx\n",
+                   fwi->offset, fwi->size, fwi->fh);
+
+               error = fuse_ipc_tx(fip);
+               if (error) {
+                       fuse_brelse(bp);
+                       fuse_fix_size(fnp, fixsize, oldsize);
+                       break;
+               }
+               fwo = fuse_out_data(fip);
+               if (fwo->size != len) {
+                       fuse_ipc_put(fip);
+                       fuse_brelse(bp);
+                       fuse_fix_size(fnp, fixsize, oldsize);
+                       break;
+               }
+               fuse_ipc_put(fip);
+
+               if (need_reopen) {
+                       error = fdrop(fp); /* calls VOP_CLOSE() */
+                       if (error) {
+                               fuse_brelse(bp);
+                               fuse_fix_size(fnp, fixsize, oldsize);
+                               break;
+                       }
+               }
+
+               error = bwrite(bp);
+               KKASSERT(!error);
+       }
+
+       fuse_knote(ap->a_vp, kflags);
+
+       fuse_dbg("uio_offset=%ju uio_resid=%ju error=%d done\n",
+           uio->uio_offset, uio->uio_resid, error);
+
+       return error;
+}
diff --git a/sys/vfs/fuse/fuse_ipc.c b/sys/vfs/fuse/fuse_ipc.c
new file mode 100644 (file)
index 0000000..84102ce
--- /dev/null
@@ -0,0 +1,259 @@
+/*-
+ * Copyright (c) 2019 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2019 The DragonFly Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "fuse.h"
+
+#include <sys/signalvar.h>
+#include <sys/kern_syscall.h>
+
+static MALLOC_DEFINE(M_FUSE_BUF, "fuse_buf", "FUSE buf");
+static MALLOC_DEFINE(M_FUSE_IPC, "fuse_ipc", "FUSE ipc");
+
+static struct objcache *fuse_ipc_objcache = NULL;
+static struct objcache_malloc_args fuse_ipc_args = {
+       sizeof(struct fuse_ipc), M_FUSE_IPC,
+};
+
+static int
+fuse_block_sigs(sigset_t *oldset)
+{
+       if (curproc) {
+               sigset_t newset;
+               int error;
+
+               SIGFILLSET(newset);
+               SIGDELSET(newset, SIGKILL);
+
+               error = kern_sigprocmask(SIG_BLOCK, &newset, oldset);
+               KKASSERT(!error);
+               return error;
+       }
+
+       return -1;
+}
+
+static int
+fuse_restore_sigs(sigset_t *oldset)
+{
+       if (curproc) {
+               int error = kern_sigprocmask(SIG_SETMASK, oldset, NULL);
+               KKASSERT(!error);
+               return error;
+       }
+
+       return -1;
+}
+
+void
+fuse_buf_alloc(struct fuse_buf *fbp, size_t len)
+{
+       fbp->buf = kmalloc(len, M_FUSE_BUF, M_WAITOK | M_ZERO);
+       KKASSERT(fbp->buf);
+       fbp->len = len;
+}
+
+void
+fuse_buf_free(struct fuse_buf *fbp)
+{
+       if (fbp->buf) {
+               kfree(fbp->buf, M_FUSE_BUF);
+               fbp->buf = NULL;
+       }
+       fbp->len = 0;
+}
+
+struct fuse_ipc*
+fuse_ipc_get(struct fuse_mount *fmp, size_t len)
+{
+       struct fuse_ipc *fip;
+
+       fip = objcache_get(fuse_ipc_objcache, M_WAITOK);
+       refcount_init(&fip->refcnt, 1);
+       fip->fmp = fmp;
+       fip->unique = atomic_fetchadd_long(&fmp->unique, 1);
+       fip->done = 0;
+
+       fuse_buf_alloc(&fip->request, sizeof(struct fuse_in_header) + len);
+       fip->reply.buf = NULL;
+
+       return fip;
+}
+
+void
+fuse_ipc_put(struct fuse_ipc *fip)
+{
+       if (refcount_release(&fip->refcnt)) {
+               fuse_buf_free(&fip->request);
+               fuse_buf_free(&fip->reply);
+               objcache_put(fuse_ipc_objcache, fip);
+       }
+}
+
+static void
+fuse_ipc_remove(struct fuse_ipc *fip)
+{
+       struct fuse_mount *fmp = fip->fmp;
+       struct fuse_ipc *p, *tmp;
+
+       mtx_lock(&fmp->ipc_lock);
+       TAILQ_FOREACH_MUTABLE(p, &fmp->request_head, request_entry, tmp) {
+               if (fip == p) {
+                       TAILQ_REMOVE(&fmp->request_head, p, request_entry);
+                       break;
+               }
+       }
+       TAILQ_FOREACH_MUTABLE(p, &fmp->reply_head, reply_entry, tmp) {
+               if (fip == p) {
+                       TAILQ_REMOVE(&fmp->reply_head, p, reply_entry);
+                       break;
+               }
+       }
+       mtx_unlock(&fmp->ipc_lock);
+}
+
+void*
+fuse_ipc_fill(struct fuse_ipc *fip, int op, uint64_t ino, struct ucred *cred)
+{
+       if (!cred)
+               cred = curthread->td_ucred;
+
+       fuse_fill_in_header(fuse_in(fip), fuse_in_size(fip), op, fip->unique,
+           ino, cred->cr_uid, cred->cr_rgid,
+           curthread->td_proc ? curthread->td_proc->p_pid : 0);
+
+       fuse_dbgipc(fip, 0, "");
+
+       return fuse_in_data(fip);
+}
+
+static int
+fuse_ipc_wait(struct fuse_ipc *fip)
+{
+       sigset_t oldset;
+       int error, retry = 0;
+
+       if (fuse_test_dead(fip->fmp)) {
+               KKASSERT(!fuse_ipc_test_replied(fip));
+               fuse_ipc_set_replied(fip);
+               return ENOTCONN;
+       }
+
+       if (fuse_ipc_test_replied(fip))
+               return 0;
+again:
+       fuse_block_sigs(&oldset);
+       error = tsleep(fip, PCATCH, "ftxp", 5 * hz);
+       fuse_restore_sigs(&oldset);
+       if (!error)
+               KKASSERT(fuse_ipc_test_replied(fip)); /* XXX */
+
+       if (error == EWOULDBLOCK) {
+               if (!fuse_ipc_test_replied(fip)) {
+                       if (!retry)
+                               fuse_print("timeout/retry\n");
+                       if (retry++ < 6)
+                               goto again;
+                       fuse_print("timeout\n");
+                       fuse_ipc_remove(fip);
+                       fuse_ipc_set_replied(fip);
+                       return ETIMEDOUT;
+               } else
+                       fuse_dbg("EWOULDBLOCK lost race\n");
+       } else if (error) {
+               fuse_print("error=%d\n", error);
+               fuse_ipc_remove(fip);
+               fuse_ipc_set_replied(fip);
+               return error;
+       }
+
+       if (fuse_test_dead(fip->fmp)) {
+               KKASSERT(fuse_ipc_test_replied(fip));
+               return ENOTCONN;
+       }
+
+       return 0;
+}
+
+int
+fuse_ipc_tx(struct fuse_ipc *fip)
+{
+       struct fuse_mount *fmp = fip->fmp;
+       struct fuse_out_header *ohd;
+       int error;
+
+       if (fuse_test_dead(fmp)) {
+               fuse_ipc_put(fip);
+               return ENOTCONN;
+       }
+
+       mtx_lock(&fmp->mnt_lock);
+
+       mtx_lock(&fmp->ipc_lock);
+       TAILQ_INSERT_TAIL(&fmp->reply_head, fip, reply_entry);
+       TAILQ_INSERT_TAIL(&fmp->request_head, fip, request_entry);
+       mtx_unlock(&fmp->ipc_lock);
+
+       wakeup(fmp);
+       KNOTE(&fmp->kq.ki_note, 0);
+       mtx_unlock(&fmp->mnt_lock);
+
+       error = fuse_ipc_wait(fip);
+       KKASSERT(fuse_ipc_test_replied(fip));
+       if (error) {
+               fuse_dbgipc(fip, error, "ipc_wait");
+               fuse_ipc_put(fip);
+               return error;
+       }
+
+       ohd = fuse_out(fip);
+       KKASSERT(ohd);
+       error = ohd->error;
+       if (error) {
+               fuse_dbgipc(fip, error, "ipc_error");
+               fuse_ipc_put(fip);
+               if (error < 0)
+                       error = -error;
+               return error;
+       }
+       fuse_dbgipc(fip, 0, "done");
+
+       return 0;
+}
+
+void
+fuse_ipc_init(void)
+{
+       fuse_ipc_objcache = objcache_create("fuse_ipc", 0, 0,
+           NULL, NULL, NULL,
+           objcache_malloc_alloc_zero, objcache_malloc_free, &fuse_ipc_args);
+}
+
+void
+fuse_ipc_cleanup(void)
+{
+       objcache_destroy(fuse_ipc_objcache);
+}
diff --git a/sys/vfs/fuse/fuse_mount.h b/sys/vfs/fuse/fuse_mount.h
new file mode 100644 (file)
index 0000000..fd9e7fe
--- /dev/null
@@ -0,0 +1,46 @@
+/*-
+ * Copyright (c) 2019 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2019 The DragonFly Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef FUSE_MOUNT_H
+#define FUSE_MOUNT_H
+
+#include <sys/mount.h>
+
+#define FUSE_MOUNT_DEFAULT_PERMISSIONS 0x01
+#define FUSE_MOUNT_ALLOW_OTHER         0x02
+#define FUSE_MOUNT_MAX_READ            0x04
+#define FUSE_MOUNT_SUBTYPE             0x08
+
+struct fuse_mount_info {
+       int flags;
+       int fd;
+       int max_read;
+       const char *subtype;
+       const char *from;
+};
+
+#endif /* !FUSE_MOUNT_H */
diff --git a/sys/vfs/fuse/fuse_node.c b/sys/vfs/fuse/fuse_node.c
new file mode 100644 (file)
index 0000000..10e0e42
--- /dev/null
@@ -0,0 +1,353 @@
+/*-
+ * Copyright (c) 2019 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2019 The DragonFly Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "fuse.h"
+
+static MALLOC_DEFINE(M_FUSE_NODE, "fuse_node", "FUSE node");
+
+static struct objcache *fuse_node_objcache = NULL;
+static struct objcache_malloc_args fuse_node_args = {
+       sizeof(struct fuse_node), M_FUSE_NODE,
+};
+
+static MALLOC_DEFINE(M_FUSE_DENT, "fuse_dent", "FUSE dent");
+
+static struct objcache *fuse_dent_objcache = NULL;
+static struct objcache_malloc_args fuse_dent_args = {
+       sizeof(struct fuse_dent), M_FUSE_DENT,
+};
+
+static int
+fuse_dent_cmp(struct fuse_dent *p1, struct fuse_dent *p2)
+{
+       return strcmp(p1->name, p2->name);
+}
+
+RB_PROTOTYPE_STATIC(fuse_dent_tree, fuse_dent, entry, fuse_dent_cmp);
+RB_GENERATE_STATIC(fuse_dent_tree, fuse_dent, dent_entry, fuse_dent_cmp);
+
+void
+fuse_node_new(struct fuse_mount *fmp, uint64_t ino, enum vtype vtyp,
+    struct fuse_node **fnpp)
+{
+       struct fuse_node *fnp;
+
+       fnp = objcache_get(fuse_node_objcache, M_WAITOK);
+       KKASSERT(fnp);
+
+       memset(fnp, 0, sizeof(*fnp));
+       fnp->vp = NULL;
+       fnp->fmp = fmp;
+       fnp->pfnp = NULL;
+
+       mtx_init(&fnp->node_lock, "fuse_node_lock");
+       RB_INIT(&fnp->dent_head);
+
+       fnp->ino = ino;
+       fnp->type = vtyp;
+       fnp->nlink = 0;
+       fnp->size = 0;
+       fnp->nlookup = 0;
+       fnp->fh = 0;
+       fnp->closed = false;
+
+       *fnpp = fnp;
+       KKASSERT(*fnpp);
+}
+
+void
+fuse_node_free(struct fuse_node *fnp)
+{
+       struct fuse_node *dfnp = fnp->pfnp;
+       struct fuse_dent *fep;
+
+       fuse_dbg("free ino=%ju\n", fnp->ino);
+
+       if (dfnp) {
+               mtx_lock(&dfnp->node_lock);
+               RB_FOREACH(fep, fuse_dent_tree, &dfnp->dent_head) {
+                       if (fep->fnp == fnp) {
+                               fuse_dent_detach(dfnp, fep);
+                               fuse_dent_free(fep);
+                               break;
+                       }
+               }
+               mtx_unlock(&dfnp->node_lock);
+       }
+
+       mtx_lock(&fnp->node_lock);
+       if (fnp->type == VDIR) {
+               while ((fep = RB_ROOT(&fnp->dent_head))) {
+                       fuse_dent_detach(fnp, fep);
+                       fuse_dent_free(fep);
+               }
+       }
+       fnp->vp->v_data = NULL;
+       fnp->vp = NULL;
+       fnp->nlink = -123; /* debug */
+       mtx_unlock(&fnp->node_lock);
+
+       objcache_put(fuse_node_objcache, fnp);
+}
+
+void
+fuse_dent_new(struct fuse_node *fnp, const char *name, int namelen,
+    struct fuse_dent **fepp)
+{
+       struct fuse_dent *fep;
+
+       fep = objcache_get(fuse_dent_objcache, M_WAITOK);
+       KKASSERT(fep);
+
+       if (namelen >= 0)
+               fep->name = kstrndup(name, namelen, M_TEMP);
+       else
+               fep->name = kstrdup(name, M_TEMP);
+       KKASSERT(fep->name);
+       fep->fnp = fnp;
+
+       KASSERT(fnp->nlink >= 0, ("new ino=%ju nlink=%d dent=\"%s\"",
+           fnp->ino, fnp->nlink, fep->name));
+       KKASSERT(fnp->nlink < LINK_MAX);
+       fnp->nlink++;
+
+       *fepp = fep;
+       KKASSERT(*fepp);
+}
+
+void
+fuse_dent_free(struct fuse_dent *fep)
+{
+       struct fuse_node *fnp = fep->fnp;
+
+       fuse_dbg("free dent=\"%s\"\n", fep->name);
+
+       KASSERT(fnp->nlink > 0, ("free ino=%ju nlink=%d dent=\"%s\"",
+           fnp->ino, fnp->nlink, fep->name));
+
+       if (fep->name) {
+               kfree(fep->name, M_TEMP);
+               fep->name = NULL;
+       }
+
+       KKASSERT(fnp->nlink <= LINK_MAX);
+       fnp->nlink--;
+
+       fep->fnp = NULL;
+       objcache_put(fuse_dent_objcache, fep);
+}
+
+void
+fuse_dent_attach(struct fuse_node *dfnp, struct fuse_dent *fep)
+{
+       KKASSERT(dfnp);
+       KKASSERT(dfnp->type == VDIR);
+       KKASSERT(mtx_islocked_ex(&dfnp->node_lock));
+
+       RB_INSERT(fuse_dent_tree, &dfnp->dent_head, fep);
+}
+
+void
+fuse_dent_detach(struct fuse_node *dfnp, struct fuse_dent *fep)
+{
+       KKASSERT(dfnp);
+       KKASSERT(dfnp->type == VDIR);
+       KKASSERT(mtx_islocked_ex(&dfnp->node_lock));
+
+       RB_REMOVE(fuse_dent_tree, &dfnp->dent_head, fep);
+}
+
+int
+fuse_dent_find(struct fuse_node *dfnp, const char *name, int namelen,
+    struct fuse_dent **fepp)
+{
+       struct fuse_dent *fep, find;
+       int error;
+
+       if (namelen >= 0)
+               find.name = kstrndup(name, namelen, M_TEMP);
+       else
+               find.name = kstrdup(name, M_TEMP);
+       KKASSERT(find.name);
+
+       fep = RB_FIND(fuse_dent_tree, &dfnp->dent_head, &find);
+       if (fep) {
+               error = 0;
+               if (fepp)
+                       *fepp = fep;
+       } else {
+               error = ENOENT;
+               fuse_dbg("dent=\"%s\" not found\n", find.name);
+       }
+
+       kfree(find.name, M_TEMP);
+
+       return error;
+}
+
+int
+fuse_alloc_node(struct fuse_node *dfnp, uint64_t ino, const char *name,
+    int namelen, enum vtype vtyp, struct vnode **vpp)
+{
+       struct fuse_node *fnp = NULL;
+       struct fuse_dent *fep = NULL;
+       int error;
+
+       if (vtyp == VBLK || vtyp == VCHR || vtyp == VFIFO)
+               return EINVAL;
+
+       mtx_lock(&dfnp->node_lock);
+       error = fuse_dent_find(dfnp, name, namelen, &fep);
+       if (!error) {
+               mtx_unlock(&dfnp->node_lock);
+               return EEXIST;
+       } else if (error == ENOENT) {
+               fuse_node_new(dfnp->fmp, ino, vtyp, &fnp);
+               mtx_lock(&fnp->node_lock);
+               fnp->pfnp = dfnp;
+               fuse_dent_new(fnp, name, namelen, &fep);
+               fuse_dent_attach(dfnp, fep);
+               mtx_unlock(&fnp->node_lock);
+       } else
+               KKASSERT(0);
+       mtx_unlock(&dfnp->node_lock);
+
+       error = fuse_node_vn(fnp, LK_EXCLUSIVE, vpp);
+       if (error) {
+               mtx_lock(&dfnp->node_lock);
+               fuse_dent_detach(dfnp, fep);
+               fuse_dent_free(fep);
+               mtx_unlock(&dfnp->node_lock);
+               fuse_node_free(fnp);
+               return error;
+       }
+       KKASSERT(*vpp);
+
+       fuse_dbg("fnp=%p ino=%ju dent=\"%s\"\n", fnp, fnp->ino, fep->name);
+
+       return 0;
+}
+
+int
+fuse_node_vn(struct fuse_node *fnp, int flags, struct vnode **vpp)
+{
+       struct mount *mp = fnp->fmp->mp;
+       struct vnode *vp;
+       int error;
+retry:
+       mtx_lock(&fnp->node_lock);
+       vp = fnp->vp;
+       if (vp) {
+               vhold(vp);
+               mtx_unlock(&fnp->node_lock);
+
+               error = vget(vp, flags | LK_RETRY);
+               if (error) {
+                       vdrop(vp);
+                       goto retry;
+               }
+               vdrop(vp);
+               *vpp = vp;
+               return 0;
+       }
+       mtx_unlock(&fnp->node_lock);
+
+       error = getnewvnode(VT_FUSE, mp, &vp, VLKTIMEOUT, LK_CANRECURSE);
+       if (error)
+               return error;
+       vp->v_type = fnp->type;
+       vp->v_data = fnp;
+
+       switch (vp->v_type) {
+       case VREG:
+               vinitvmio(vp, fnp->size, FUSE_BLKSIZE, -1);
+               break;
+       case VDIR:
+               break;
+       case VBLK:
+       case VCHR:
+               KKASSERT(0);
+               vp->v_ops = &mp->mnt_vn_spec_ops;
+               addaliasu(vp, umajor(0), uminor(0)); /* XXX CUSE */
+               break;
+       case VLNK:
+               break;
+       case VSOCK:
+               break;
+       case VFIFO:
+               KKASSERT(0);
+       case VDATABASE:
+               break;
+       default:
+               KKASSERT(0);
+       }
+
+       KKASSERT(vn_islocked(vp) == LK_EXCLUSIVE);
+       KASSERT(!fnp->vp, ("lost race"));
+       fnp->vp = vp;
+       *vpp = vp;
+
+       return 0;
+}
+
+int
+fuse_node_truncate(struct fuse_node *fnp, size_t oldsize, size_t newsize)
+{
+       struct vnode *vp = fnp->vp;
+       int error;
+
+       fuse_dbg("ino=%ju update size %ju -> %ju\n",
+           fnp->ino, oldsize, newsize);
+
+       fnp->attr.va_size = fnp->size = newsize;
+
+       if (newsize < oldsize)
+               error = nvtruncbuf(vp, newsize, FUSE_BLKSIZE, -1, 0);
+       else
+               error = nvextendbuf(vp, oldsize, newsize, FUSE_BLKSIZE,
+                   FUSE_BLKSIZE, -1, -1, 0);
+       return error;
+}
+
+void
+fuse_node_init(void)
+{
+       fuse_node_objcache = objcache_create("fuse_node", 0, 0,
+           NULL, NULL, NULL,
+           objcache_malloc_alloc_zero, objcache_malloc_free, &fuse_node_args);
+
+       fuse_dent_objcache = objcache_create("fuse_dent", 0, 0,
+           NULL, NULL, NULL,
+           objcache_malloc_alloc_zero, objcache_malloc_free, &fuse_dent_args);
+}
+
+void
+fuse_node_cleanup(void)
+{
+       objcache_destroy(fuse_node_objcache);
+       objcache_destroy(fuse_dent_objcache);
+}
diff --git a/sys/vfs/fuse/fuse_util.c b/sys/vfs/fuse/fuse_util.c
new file mode 100644 (file)
index 0000000..edde193
--- /dev/null
@@ -0,0 +1,339 @@
+/*-
+ * Copyright (c) 2019 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2019 The DragonFly Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "fuse.h"
+
+void
+fuse_hexdump(const char *p, size_t len)
+{
+       int i;
+
+       if (!fuse_debug)
+               return;
+
+       for (i = 0; i < (int)len; i++) {
+               kprintf("%02X ", p[i] & 0xff);
+               if ((i + 1) % 32 == 0)
+                       kprintf("\n");
+       }
+       kprintf("\n");
+}
+
+void
+fuse_fill_in_header(struct fuse_in_header *ihd,
+    uint32_t len, uint32_t opcode, uint64_t unique, uint64_t nodeid,
+    uint32_t uid, uint32_t gid, uint32_t pid)
+{
+       ihd->len = len;
+       ihd->opcode = opcode;
+       ihd->unique = unique;
+       ihd->nodeid = nodeid;
+       ihd->uid = uid;
+       ihd->gid = gid;
+       ihd->pid = pid;
+}
+
+int
+fuse_forget_node(struct fuse_mount *fmp, uint64_t ino, uint64_t nlookup,
+    struct ucred *cred)
+{
+       struct fuse_ipc *fip;
+       struct fuse_forget_in *ffi;
+       int error;
+
+       KKASSERT(nlookup > 0);
+
+       fip = fuse_ipc_get(fmp, sizeof(*ffi));
+       ffi = fuse_ipc_fill(fip, FUSE_FORGET, ino, cred);
+       ffi->nlookup = nlookup;
+
+       error = fuse_ipc_tx(fip);
+       if (error)
+               return error;
+       fuse_ipc_put(fip);
+
+       return 0;
+}
+
+/*
+ * Ignore FUSE_COMPAT_XXX which seem to exist for backward compatibility
+ * for ancient versions of FUSE protocol.
+ */
+int
+fuse_audit_length(struct fuse_in_header *ihd, struct fuse_out_header *ohd)
+{
+       size_t len = ohd->len - sizeof(struct fuse_out_header);
+       bool res;
+
+       switch (ihd->opcode) {
+       case FUSE_LOOKUP:
+               res = (len == sizeof(struct fuse_entry_out));
+               break;
+       case FUSE_FORGET:
+               res = true;
+               break;
+       case FUSE_GETATTR:
+               res = (len == sizeof(struct fuse_attr_out));
+               break;
+       case FUSE_SETATTR:
+               res = (len == sizeof(struct fuse_attr_out));
+               break;
+       case FUSE_READLINK:
+               res = (len <= PAGE_SIZE);
+               break;
+       case FUSE_SYMLINK:
+               res = (len == sizeof(struct fuse_entry_out));
+               break;
+       case FUSE_MKNOD:
+               res = (len == sizeof(struct fuse_entry_out));
+               break;
+       case FUSE_MKDIR:
+               res = (len == sizeof(struct fuse_entry_out));
+               break;
+       case FUSE_UNLINK:
+               res = (len == 0);
+               break;
+       case FUSE_RMDIR:
+               res = (len == 0);
+               break;
+       case FUSE_RENAME:
+               res = (len == 0);
+               break;
+       case FUSE_LINK:
+               res = (len == sizeof(struct fuse_entry_out));
+               break;
+       case FUSE_OPEN:
+               res = (len == sizeof(struct fuse_open_out));
+               break;
+       case FUSE_READ:
+               res = (len <= ((struct fuse_read_in*)(ihd + 1))->size);
+               break;
+       case FUSE_WRITE:
+               res = (len == sizeof(struct fuse_write_out));
+               break;
+       case FUSE_STATFS:
+               res = (len == sizeof(struct fuse_statfs_out));
+               break;
+       case FUSE_RELEASE:
+               res = (len == 0);
+               break;
+       case FUSE_FSYNC:
+               res = (len == 0);
+               break;
+       case FUSE_SETXATTR:
+               res = (len == 0);
+               break;
+       case FUSE_GETXATTR:
+               res = true;
+               break;
+       case FUSE_LISTXATTR:
+               res = true;
+               break;
+       case FUSE_REMOVEXATTR:
+               res = (len == 0);
+               break;
+       case FUSE_FLUSH:
+               res = (len == 0);
+               break;
+       case FUSE_INIT:
+               res = (len == sizeof(struct fuse_init_out));
+               break;
+       case FUSE_OPENDIR:
+               res = (len == sizeof(struct fuse_open_out));
+               break;
+       case FUSE_READDIR:
+               res = (len <= ((struct fuse_read_in*)(ihd + 1))->size);
+               break;
+       case FUSE_RELEASEDIR:
+               res = (len == 0);
+               break;
+       case FUSE_FSYNCDIR:
+               res = (len == 0);
+               break;
+       case FUSE_GETLK:
+               res = false;
+               break;
+       case FUSE_SETLK:
+               res = false;
+               break;
+       case FUSE_SETLKW:
+               res = false;
+               break;
+       case FUSE_ACCESS:
+               res = (len == 0);
+               break;
+       case FUSE_CREATE:
+               res = (len == sizeof(struct fuse_entry_out) +
+                   sizeof(struct fuse_open_out));
+               break;
+       case FUSE_INTERRUPT:
+               res = false;
+               break;
+       case FUSE_BMAP:
+               res = false;
+               break;
+       case FUSE_DESTROY:
+               res = (len == 0);
+               break;
+       case FUSE_IOCTL:
+               res = false;
+               break;
+       case FUSE_POLL:
+               res = false;
+               break;
+       case FUSE_NOTIFY_REPLY:
+               res = false;
+               break;
+       case FUSE_BATCH_FORGET:
+               res = false;
+               break;
+       case FUSE_FALLOCATE:
+               res = false;
+               break;
+       case FUSE_READDIRPLUS:
+               res = false;
+               break;
+       case FUSE_RENAME2:
+               res = false;
+               break;
+       case FUSE_LSEEK:
+               res = false;
+               break;
+       case FUSE_COPY_FILE_RANGE:
+               res = false;
+               break;
+       default:
+               fuse_panic("Invalid opcode %d", ihd->opcode);
+               break;
+       }
+
+       if (!res)
+               return -1;
+       return 0;
+}
+
+const char*
+fuse_get_ops(int op)
+{
+       switch (op) {
+       case FUSE_LOOKUP:
+               return "FUSE_LOOKUP";
+       case FUSE_FORGET:
+               return "FUSE_FORGET";
+       case FUSE_GETATTR:
+               return "FUSE_GETATTR";
+       case FUSE_SETATTR:
+               return "FUSE_SETATTR";
+       case FUSE_READLINK:
+               return "FUSE_READLINK";
+       case FUSE_SYMLINK:
+               return "FUSE_SYMLINK";
+       case FUSE_MKNOD:
+               return "FUSE_MKNOD";
+       case FUSE_MKDIR:
+               return "FUSE_MKDIR";
+       case FUSE_UNLINK:
+               return "FUSE_UNLINK";
+       case FUSE_RMDIR:
+               return "FUSE_RMDIR";
+       case FUSE_RENAME:
+               return "FUSE_RENAME";
+       case FUSE_LINK:
+               return "FUSE_LINK";
+       case FUSE_OPEN:
+               return "FUSE_OPEN";
+       case FUSE_READ:
+               return "FUSE_READ";
+       case FUSE_WRITE:
+               return "FUSE_WRITE";
+       case FUSE_STATFS:
+               return "FUSE_STATFS";
+       case FUSE_RELEASE:
+               return "FUSE_RELEASE";
+       case FUSE_FSYNC:
+               return "FUSE_FSYNC";
+       case FUSE_SETXATTR:
+               return "FUSE_SETXATTR";
+       case FUSE_GETXATTR:
+               return "FUSE_GETXATTR";
+       case FUSE_LISTXATTR:
+               return "FUSE_LISTXATTR";
+       case FUSE_REMOVEXATTR:
+               return "FUSE_REMOVEXATTR";
+       case FUSE_FLUSH:
+               return "FUSE_FLUSH";
+       case FUSE_INIT:
+               return "FUSE_INIT";
+       case FUSE_OPENDIR:
+               return "FUSE_OPENDIR";
+       case FUSE_READDIR:
+               return "FUSE_READDIR";
+       case FUSE_RELEASEDIR:
+               return "FUSE_RELEASEDIR";
+       case FUSE_FSYNCDIR:
+               return "FUSE_FSYNCDIR";
+       case FUSE_GETLK:
+               return "FUSE_GETLK";
+       case FUSE_SETLK:
+               return "FUSE_SETLK";
+       case FUSE_SETLKW:
+               return "FUSE_SETLKW";
+       case FUSE_ACCESS:
+               return "FUSE_ACCESS";
+       case FUSE_CREATE:
+               return "FUSE_CREATE";
+       case FUSE_INTERRUPT:
+               return "FUSE_INTERRUPT";
+       case FUSE_BMAP:
+               return "FUSE_BMAP";
+       case FUSE_DESTROY:
+               return "FUSE_DESTROY";
+       case FUSE_IOCTL:
+               return "FUSE_IOCTL";
+       case FUSE_POLL:
+               return "FUSE_POLL";
+       case FUSE_NOTIFY_REPLY:
+               return "FUSE_NOTIFY_REPLY";
+       case FUSE_BATCH_FORGET:
+               return "FUSE_BATCH_FORGET";
+       case FUSE_FALLOCATE:
+               return "FUSE_FALLOCATE";
+       case FUSE_READDIRPLUS:
+               return "FUSE_READDIRPLUS";
+       case FUSE_RENAME2:
+               return "FUSE_RENAME2";
+       case FUSE_LSEEK:
+               return "FUSE_LSEEK";
+       case FUSE_COPY_FILE_RANGE:
+               return "FUSE_COPY_FILE_RANGE";
+       default:
+               fuse_panic("Invalid opcode %d", op);
+               break;
+       }
+
+       return NULL;
+}
diff --git a/sys/vfs/fuse/fuse_vfsops.c b/sys/vfs/fuse/fuse_vfsops.c
new file mode 100644 (file)
index 0000000..1d039fe
--- /dev/null
@@ -0,0 +1,404 @@
+/*-
+ * Copyright (c) 2019 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2019 The DragonFly Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "fuse.h"
+
+#include <sys/device.h>
+#include <sys/devfs.h>
+#include <sys/nlookup.h>
+#include <sys/file.h>
+#include <sys/sysctl.h>
+#include <sys/statvfs.h>
+#include <sys/priv.h>
+
+int fuse_debug = 0;
+
+SYSCTL_NODE(_vfs, OID_AUTO, fuse, CTLFLAG_RD, 0, "FUSE");
+
+SYSCTL_INT(_vfs_fuse, OID_AUTO, version_major, CTLFLAG_RD, NULL,
+    FUSE_KERNEL_VERSION, "FUSE kernel version (major)");
+SYSCTL_INT(_vfs_fuse, OID_AUTO, version_minor, CTLFLAG_RD, NULL,
+    FUSE_KERNEL_MINOR_VERSION, "FUSE kernel version (minor)");
+
+SYSCTL_INT(_vfs_fuse, OID_AUTO, debug, CTLFLAG_RW, &fuse_debug, 1, "");
+
+int
+fuse_cmp_version(struct fuse_mount *fmp, uint32_t major, uint32_t minor)
+{
+       if (fmp->abi_major == major && fmp->abi_minor == minor)
+               return 0;
+
+       if (fmp->abi_major > major ||
+           (fmp->abi_major == major && fmp->abi_minor > minor))
+               return 1;
+
+       return -1;
+}
+
+int
+fuse_mount_kill(struct fuse_mount *fmp)
+{
+       if (!fuse_test_dead(fmp)) {
+               fuse_set_dead(fmp);
+               wakeup(fmp);
+               KNOTE(&fmp->kq.ki_note, 0);
+               return 0;
+       }
+
+       return -1;
+}
+
+int
+fuse_mount_free(struct fuse_mount *fmp)
+{
+       if (refcount_release(&fmp->refcnt)) {
+               fuse_dbg("fmp=%p free\n", fmp);
+               mtx_uninit(&fmp->ipc_lock);
+               mtx_uninit(&fmp->mnt_lock);
+               crfree(fmp->cred);
+               kfree(fmp, M_TEMP);
+               return 0;
+       }
+       fuse_dbg("fmp=%p %u refcnt left\n", fmp, fmp->refcnt);
+
+       return -1;
+}
+
+static int
+fuse_mount(struct mount *mp, char *mntpt, caddr_t data, struct ucred *cred)
+{
+       struct statfs *sbp = &mp->mnt_stat;
+       struct vnode *devvp;
+       struct file *file;
+       struct nlookupdata nd;
+       struct fuse_mount_info args;
+       struct fuse_mount *fmp;
+       struct fuse_ipc *fip;
+       struct fuse_init_in *fii;
+       struct fuse_init_out *fio;
+       char subtype[512];
+       int error;
+
+       if (mp->mnt_flag & MNT_UPDATE)
+               return EOPNOTSUPP;
+
+       error = copyin(data, &args, sizeof(args));
+       if (error)
+               return error;
+       memcpy(sbp->f_mntfromname, args.from, sizeof(sbp->f_mntfromname));
+
+       memset(sbp->f_mntfromname, 0, sizeof(sbp->f_mntfromname));
+       error = copyinstr(args.from, sbp->f_mntfromname,
+           sizeof(sbp->f_mntfromname), NULL);
+       if (error)
+               return error;
+
+       memset(sbp->f_mntonname, 0, sizeof(sbp->f_mntonname));
+       error = copyinstr(mntpt, sbp->f_mntonname, sizeof(sbp->f_mntonname),
+           NULL);
+       if (error)
+               return error;
+
+       memset(subtype, 0, sizeof(subtype));
+       error = copyinstr(args.subtype, subtype, sizeof(subtype), NULL);
+       if (error)
+               return error;
+       if (strlen(subtype)) {
+               strlcat(sbp->f_fstypename, ".", sizeof(sbp->f_fstypename));
+               strlcat(sbp->f_fstypename, subtype, sizeof(sbp->f_fstypename));
+       }
+
+       error = nlookup_init(&nd, sbp->f_mntfromname, UIO_SYSSPACE, NLC_FOLLOW);
+       if (!error) {
+               error = nlookup(&nd);
+               if (!error)
+                       error = cache_vref(&nd.nl_nch, nd.nl_cred, &devvp);
+               nlookup_done(&nd);
+       }
+       if (error)
+               return error;
+       if (!devvp)
+               return ENODEV;
+
+       vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
+       error = VOP_ACCESS(devvp, VREAD | VWRITE, cred);
+       if (error)
+               error = priv_check_cred(cred, PRIV_ROOT, 0);
+       if (error) {
+               vput(devvp);
+               return error;
+       }
+       vn_unlock(devvp);
+
+       fuse_dbg("fd=%d\n", args.fd);
+       file = holdfp_fdp(curthread->td_proc->p_fd, args.fd, FREAD | FWRITE);
+       if (!file) {
+               vrele(devvp);
+               return EBADF;
+       }
+       error = devfs_get_cdevpriv(file, (void**)&fmp);
+       dropfp(curthread, args.fd, file);
+       if (error) {
+               vrele(devvp);
+               return error;
+       }
+       KKASSERT(fmp);
+
+       fmp->mp = mp;
+       fmp->dead = false;
+       mtx_init(&fmp->mnt_lock, "fuse_mnt_lock");
+       mtx_init(&fmp->ipc_lock, "fuse_ipc_lock");
+       TAILQ_INIT(&fmp->request_head);
+       TAILQ_INIT(&fmp->reply_head);
+       fmp->devvp = devvp;
+       fmp->cred = crhold(cred);
+       KKASSERT(fmp->refcnt > 0);
+       refcount_acquire(&fmp->refcnt);
+
+       mp->mnt_flag |= MNT_LOCAL;
+       mp->mnt_kern_flag |= MNTK_ALL_MPSAFE;
+       mp->mnt_data = (qaddr_t)fmp;
+
+       fuse_node_new(fmp, FUSE_ROOT_ID, VDIR, &fmp->rfnp);
+       KKASSERT(fmp->rfnp->ino == FUSE_ROOT_ID);
+
+       vfs_getnewfsid(mp);
+       vfs_add_vnodeops(mp, &fuse_vnode_vops, &mp->mnt_vn_norm_ops);
+       vfs_add_vnodeops(mp, &fuse_spec_vops, &mp->mnt_vn_spec_ops);
+
+       fip = fuse_ipc_get(fmp, sizeof(*fii));
+       fii = fuse_ipc_fill(fip, FUSE_INIT, FUSE_ROOT_ID, NULL);
+       fii->major = FUSE_KERNEL_VERSION;
+       fii->minor = FUSE_KERNEL_MINOR_VERSION;
+       fii->max_readahead = FUSE_BLKSIZE;
+       /* unused */
+       //fii->flags = ...;
+
+       error = fuse_ipc_tx(fip);
+       if (error) {
+               vrele(devvp);
+               return error;
+       }
+
+       fio = fuse_out_data(fip);
+       fmp->abi_major = fio->major;
+       fmp->abi_minor = fio->minor;
+       fmp->max_write = fio->max_write;
+
+       if (fuse_cmp_version(fmp, 7, 0) < 0) {
+               fuse_ipc_put(fip);
+               vrele(devvp);
+               return EPROTONOSUPPORT;
+       }
+
+       /* unused */
+       //fio->max_readahead
+       //fio->flags
+       //fio->max_background
+       //fio->congestion_threshold
+       //fio->time_gran
+       //fio->max_pages
+       fuse_print("FUSE UABI %d.%d\n", fmp->abi_major, fmp->abi_minor);
+
+       fuse_ipc_put(fip);
+
+       VFS_STATFS(mp, &mp->mnt_stat, cred);
+
+       return 0;
+}
+
+static int
+fuse_unmount(struct mount *mp, int mntflags)
+{
+       struct fuse_mount *fmp = VFSTOFUSE(mp);
+       struct fuse_ipc *fip;
+       int error, flags = 0;
+
+       mtx_lock(&fmp->mnt_lock);
+       if (mntflags & MNT_FORCE)
+               flags |= FORCECLOSE;
+
+       error = vflush(mp, 0, flags);
+       if (error) {
+               mtx_unlock(&fmp->mnt_lock);
+               fuse_dbg("vflush error=%d\n", error);
+               return error;
+       }
+
+       if (!fuse_test_dead(fmp)) {
+               fuse_dbg("not dead yet, destroying\n");
+               fip = fuse_ipc_get(fmp, 0);
+               fuse_ipc_fill(fip, FUSE_DESTROY, FUSE_ROOT_ID, NULL);
+               if (!fuse_ipc_tx(fip))
+                       fuse_ipc_put(fip);
+               fuse_mount_kill(fmp);
+       }
+
+       /* The userspace fs will exit anyway after FUSE_DESTROY. */
+       vn_lock(fmp->devvp, LK_EXCLUSIVE | LK_RETRY);
+       VOP_CLOSE(fmp->devvp, FREAD | FWRITE, NULL);
+       vn_unlock(fmp->devvp);
+
+       vrele(fmp->devvp);
+       mtx_unlock(&fmp->mnt_lock);
+
+       fuse_mount_free(fmp);
+       mp->mnt_data = NULL;
+       mp->mnt_flag &= ~MNT_LOCAL;
+
+       fuse_dbg("unmount done\n");
+
+       return 0;
+}
+
+static int
+fuse_root(struct mount *mp, struct vnode **vpp)
+{
+       struct fuse_mount *fmp = VFSTOFUSE(mp);
+       int error;
+
+       KASSERT(fmp->rfnp, ("no root node"));
+       KKASSERT(fmp->rfnp->fmp);
+
+       error = fuse_node_vn(fmp->rfnp, LK_EXCLUSIVE, vpp);
+       if (!error) {
+               struct vnode *vp = *vpp;
+               vp->v_flag |= VROOT;
+               KKASSERT(vp->v_type == VDIR);
+       }
+
+       return error;
+}
+
+static int
+fuse_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
+{
+       struct fuse_mount *fmp = VFSTOFUSE(mp);
+       struct fuse_ipc *fip;
+       struct fuse_statfs_out *fso;
+       int error;
+
+       fip = fuse_ipc_get(fmp, 0);
+       fuse_ipc_fill(fip, FUSE_STATFS, FUSE_ROOT_ID, cred);
+       error = fuse_ipc_tx(fip);
+       if (error)
+               return error;
+
+       fso = fuse_out_data(fip);
+
+       mtx_lock(&fmp->mnt_lock);
+       sbp->f_bsize = fso->st.frsize;
+       sbp->f_iosize = FUSE_BLKSIZE;
+       sbp->f_blocks = fso->st.blocks;
+       sbp->f_bfree = fso->st.bfree;
+       sbp->f_bavail = fso->st.bavail;
+       sbp->f_files = fso->st.files;
+       sbp->f_ffree = fso->st.ffree;
+       mtx_unlock(&fmp->mnt_lock);
+
+       fuse_ipc_put(fip);
+
+       return 0;
+}
+
+static int
+fuse_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred)
+{
+       struct fuse_mount *fmp = VFSTOFUSE(mp);
+       struct fuse_ipc *fip;
+       struct fuse_statfs_out *fso;
+       int error;
+
+       fip = fuse_ipc_get(fmp, 0);
+       fuse_ipc_fill(fip, FUSE_STATFS, FUSE_ROOT_ID, cred);
+       error = fuse_ipc_tx(fip);
+       if (error)
+               return error;
+
+       fso = fuse_out_data(fip);
+
+       mtx_lock(&fmp->mnt_lock);
+       sbp->f_bsize = fso->st.frsize;
+       sbp->f_frsize = FUSE_BLKSIZE;
+       sbp->f_blocks = fso->st.blocks;
+       sbp->f_bfree = fso->st.bfree;
+       sbp->f_bavail = fso->st.bavail;
+       sbp->f_files = fso->st.files;
+       sbp->f_ffree = fso->st.ffree;
+       mtx_unlock(&fmp->mnt_lock);
+
+       fuse_ipc_put(fip);
+
+       return 0;
+}
+
+static int
+fuse_init(struct vfsconf *vfsp)
+{
+       int error;
+
+       fuse_node_init();
+       fuse_ipc_init();
+       fuse_file_init();
+
+       error = fuse_device_init();
+       if (error) {
+               fuse_file_cleanup();
+               fuse_ipc_cleanup();
+               fuse_node_cleanup();
+               return error;
+       }
+
+       fuse_print("FUSE ABI %d.%d\n", FUSE_KERNEL_VERSION,
+           FUSE_KERNEL_MINOR_VERSION);
+
+       return 0;
+}
+
+static int
+fuse_uninit(struct vfsconf *vfsp)
+{
+       fuse_file_cleanup();
+       fuse_ipc_cleanup();
+       fuse_node_cleanup();
+       fuse_device_cleanup();
+
+       return 0;
+}
+
+static struct vfsops fuse_vfsops = {
+       .vfs_init = fuse_init,
+       .vfs_uninit = fuse_uninit,
+       .vfs_mount = fuse_mount,
+       .vfs_unmount = fuse_unmount,
+       .vfs_root = fuse_root,
+       .vfs_statfs = fuse_statfs,
+       .vfs_statvfs = fuse_statvfs,
+};
+
+VFS_SET(fuse_vfsops, fuse, VFCF_SYNTHETIC | VFCF_MPSAFE);
+MODULE_VERSION(fuse, 1);
diff --git a/sys/vfs/fuse/fuse_vnops.c b/sys/vfs/fuse/fuse_vnops.c
new file mode 100644 (file)
index 0000000..ed419bd
--- /dev/null
@@ -0,0 +1,1624 @@
+/*-
+ * Copyright (c) 2019 Tomohiro Kusumi <tkusumi@netbsd.org>
+ * Copyright (c) 2019 The DragonFly Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "fuse.h"
+
+#include <sys/fcntl.h>
+#include <sys/dirent.h>
+#include <sys/namei.h>
+#include <sys/uio.h>
+#include <sys/mountctl.h>
+
+static int
+fuse_set_attr(struct fuse_node *fnp, struct fuse_attr *fat)
+{
+       struct vattr *vap = &fnp->attr;
+       int error = 0;
+
+       vattr_null(vap);
+
+       vap->va_type = IFTOVT(fat->mode);
+       vap->va_size = fat->size;
+       vap->va_bytes = fat->blocks * S_BLKSIZE;
+       vap->va_mode = fat->mode & ~S_IFMT;
+       if (!fat->nlink) /* XXX .fuse_hidden* has 0 link */
+               vap->va_nlink = 1;
+       else
+               vap->va_nlink = fat->nlink;
+       vap->va_uid = fat->uid;
+       vap->va_gid = fat->gid;
+       vap->va_fsid = fnp->fmp->mp->mnt_stat.f_fsid.val[0];
+       vap->va_fileid = fat->ino;
+       vap->va_blocksize = FUSE_BLKSIZE;
+       vap->va_rmajor = VNOVAL;
+       vap->va_rminor = VNOVAL;
+       vap->va_atime.tv_sec = fat->atime;
+       vap->va_atime.tv_nsec = fat->atimensec;
+       vap->va_mtime.tv_sec = fat->mtime;
+       vap->va_mtime.tv_nsec = fat->mtimensec;
+       vap->va_ctime.tv_sec = fat->ctime;
+       vap->va_ctime.tv_nsec = fat->ctimensec;
+       vap->va_flags = 0;
+       vap->va_gen = VNOVAL;
+       vap->va_vaflags = 0;
+
+       KKASSERT(vap->va_type == fnp->type);
+
+       if (fnp->nlink != vap->va_nlink) {
+               fuse_dbg("ino=%ju update nlink %d -> %ju\n",
+                   fnp->ino, fnp->nlink, vap->va_nlink);
+               fnp->nlink = vap->va_nlink;
+       }
+
+       if (fnp->vp->v_object && fnp->size != vap->va_size)
+               error = fuse_node_truncate(fnp, fnp->size, vap->va_size);
+
+       return error;
+}
+
+static int
+fuse_vop_access(struct vop_access_args *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       mode_t mode = ap->a_mode;
+       struct fuse_mount *fmp = VFSTOFUSE(vp->v_mount);
+       struct fuse_ipc *fip;
+       struct fuse_access_in *fai;
+       uint32_t mask;
+       int error;
+
+       if (fuse_test_dead(fmp))
+               return 0;
+
+       if (fuse_test_nosys(fmp, FUSE_ACCESS))
+               return 0;
+
+       switch (vp->v_type) {
+       case VDIR:
+       case VLNK:
+       case VREG:
+               if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY))
+                       return EROFS;
+               break;
+       case VBLK:
+       case VCHR:
+       case VSOCK:
+       case VFIFO:
+               break;
+       default:
+               return EINVAL;
+       }
+
+       mask = F_OK;
+       if (mode & VEXEC)
+               mask |= X_OK;
+       if (mode & VWRITE)
+               mask |= W_OK;
+       if (mode & VREAD)
+               mask |= R_OK;
+
+       fip = fuse_ipc_get(fmp, sizeof(*fai));
+       fai = fuse_ipc_fill(fip, FUSE_ACCESS, VTOI(vp)->ino, ap->a_cred);
+       fai->mask = mask;
+
+       error = fuse_ipc_tx(fip);
+       if (error) {
+               if (error == ENOSYS)
+                       error = 0;
+               if (error == ENOTCONN && (vp->v_flag & VROOT))
+                       error = 0;
+               return error;
+       }
+
+       fuse_ipc_put(fip);
+
+       return 0;
+}
+
+static int
+fuse_vop_open(struct vop_open_args *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       struct fuse_mount *fmp = VFSTOFUSE(vp->v_mount);
+       struct fuse_node *fnp = VTOI(vp);
+       struct fuse_ipc *fip;
+       struct fuse_open_in *foi;
+       struct fuse_open_out *foo;
+       int error, op;
+
+       if (fuse_test_dead(fmp))
+               return ENOTCONN;
+
+       if (fuse_test_nosys(fmp, FUSE_OPEN))
+               return EOPNOTSUPP;
+
+       if (vp->v_type == VDIR)
+               op = FUSE_OPENDIR;
+       else
+               op = FUSE_OPEN;
+
+       fip = fuse_ipc_get(fmp, sizeof(*foi));
+       foi = fuse_ipc_fill(fip, op, fnp->ino, ap->a_cred);
+       foi->flags = OFLAGS(ap->a_mode);
+       fuse_dbg("flags=%X\n", foi->flags);
+       if (foi->flags & O_CREAT) {
+               fuse_dbg("drop O_CREAT\n");
+               foi->flags &= ~O_CREAT;
+       }
+
+       error = fuse_ipc_tx(fip);
+       if (error)
+               return error;
+
+       /* XXX unused */
+       foo = fuse_out_data(fip);
+       if (foo->open_flags & FOPEN_DIRECT_IO)
+               ;
+       else if (foo->open_flags & FOPEN_KEEP_CACHE)
+               ;
+       else if (foo->open_flags & FOPEN_NONSEEKABLE)
+               ;
+       else if (foo->open_flags & FOPEN_CACHE_DIR)
+               ;
+
+       fnp->closed = false;
+       fuse_get_nfh(VTOI(vp), foo->fh);
+       if (ap->a_fp) {
+#if 1
+               fuse_get_fh(ap->a_fp, foo->fh);
+#else
+               /* see #if0'd code in fuse_vop_setattr() */
+               if (!ap->a_fp->private_data)
+                       fuse_get_fh(ap->a_fp, foo->fh);
+               else {
+                       uint64_t *fhp = ap->a_fp->private_data;
+                       *fhp = foo->fh;
+               }
+#endif
+       }
+
+       fuse_ipc_put(fip);
+
+       return vop_stdopen(ap);
+}
+
+static int
+fuse_vop_close(struct vop_close_args *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       struct fuse_mount *fmp = VFSTOFUSE(vp->v_mount);
+       struct fuse_node *fnp = VTOI(vp);
+       struct fuse_ipc *fip;
+       struct fuse_release_in *fri;
+       int error, op;
+
+       if (fuse_test_dead(fmp))
+               return 0;
+
+       if (fuse_test_nosys(fmp, FUSE_RELEASE) ||
+           fuse_test_nosys(fmp, FUSE_RELEASEDIR))
+               return EOPNOTSUPP;
+
+       if (vp->v_type == VDIR)
+               op = FUSE_RELEASEDIR;
+       else
+               op = FUSE_RELEASE;
+
+       fip = fuse_ipc_get(fmp, sizeof(*fri));
+       fri = fuse_ipc_fill(fip, op, fnp->ino, NULL);
+       /* unused */
+       //fri->flags = ...;
+       //fri->release_flags = ...;
+       //fri->lock_owner = ...;
+       fri->fh = fuse_nfh(VTOI(vp));
+       if (ap->a_fp)
+               fri->fh = fuse_fh(ap->a_fp);
+
+       error = fuse_ipc_tx(fip);
+       if (error)
+               return error;
+
+       fuse_ipc_put(fip);
+
+       fnp->closed = true;
+       fuse_put_nfh(VTOI(vp));
+       if (ap->a_fp)
+               fuse_put_fh(ap->a_fp);
+
+       return vop_stdclose(ap);
+}
+
+static int
+fuse_vop_fsync(struct vop_fsync_args *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       struct fuse_mount *fmp = VFSTOFUSE(vp->v_mount);
+       struct fuse_ipc *fip;
+       struct fuse_fsync_in *fsi;
+       int error, op;
+
+       if (fuse_test_dead(fmp))
+               return 0;
+
+       if (fuse_test_nosys(fmp, FUSE_FSYNC))
+               return 0;
+
+       if (vp->v_type == VDIR)
+               op = FUSE_FSYNCDIR;
+       else
+               op = FUSE_FSYNC;
+
+       fip = fuse_ipc_get(fmp, sizeof(*fsi));
+       fsi = fuse_ipc_fill(fip, op, VTOI(vp)->ino, NULL);
+       fsi->fh = fuse_nfh(VTOI(vp));
+       if (ap->a_fp)
+               fsi->fh = fuse_fh(ap->a_fp);
+       fsi->fsync_flags = 1; /* datasync */
+
+       error = fuse_ipc_tx(fip);
+       if (error)
+               return error;
+       fuse_ipc_put(fip);
+
+       vn_syncer_remove(vp, 1);
+       vfsync(ap->a_vp, ap->a_waitfor, 1, NULL, NULL);
+       vclrisdirty(vp);
+
+       return 0;
+}
+
+static int
+fuse_vop_getattr(struct vop_getattr_args *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       struct vattr *vap = ap->a_vap;
+       struct fuse_mount *fmp = VFSTOFUSE(vp->v_mount);
+       struct fuse_node *fnp = VTOI(vp);
+       struct fuse_ipc *fip;
+       struct fuse_getattr_in *fgi;
+       struct fuse_attr_out *fao;
+       int error;
+
+       if (fuse_test_dead(fmp))
+               return 0;
+
+       if (fuse_test_nosys(fmp, FUSE_GETATTR))
+               return 0;
+
+       fip = fuse_ipc_get(fmp, sizeof(*fgi));
+       fgi = fuse_ipc_fill(fip, FUSE_GETATTR, fnp->ino, NULL);
+#if 0
+       /* this may be called before open when fh is 0 */
+       fgi->getattr_flags |= FUSE_GETATTR_FH;
+       fgi->fh = fuse_nfh(fnp);
+       if (ap->a_fp)
+               fgi->fh = fuse_fh(ap->a_fp);
+#endif
+       error = fuse_ipc_tx(fip);
+       if (error) {
+               if (error == ENOSYS)
+                       error = 0;
+               if (error == ENOTCONN && (vp->v_flag & VROOT)) {
+                       memset(vap, 0, sizeof(*vap));
+                       vap->va_type = vp->v_type;
+                       error = 0;
+               }
+               return error;
+       }
+
+       fao = fuse_out_data(fip);
+       mtx_lock(&fnp->node_lock);
+       fuse_set_attr(fnp, &fao->attr);
+       memcpy(vap, &fnp->attr, sizeof(*vap));
+       /* unused */
+       //fao->attr_valid;
+       //fao->attr_valid_nsec;
+       mtx_unlock(&fnp->node_lock);
+
+       fuse_ipc_put(fip);
+
+       if (vap->va_type != vp->v_type)
+               return EINVAL;
+
+       return 0;
+}
+
+static int
+fuse_vop_setattr(struct vop_setattr_args *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       struct vattr *vap = ap->a_vap;
+       struct fuse_mount *fmp = VFSTOFUSE(vp->v_mount);
+       struct fuse_node *fnp = VTOI(vp);
+       struct fuse_ipc *fip;
+       struct fuse_setattr_in *fsi, arg;
+       struct fuse_attr_out *fao;
+       int kflags = 0;
+       int error = 0;
+
+       if (fuse_test_dead(fmp))
+               return 0;
+
+       if (fuse_test_nosys(fmp, FUSE_SETATTR))
+               return 0;
+
+       if (vp->v_mount->mnt_flag & MNT_RDONLY)
+               return EROFS;
+
+       memset(&arg, 0, sizeof(arg));
+       mtx_lock(&fnp->node_lock);
+
+       if (!error && (vap->va_flags != VNOVAL)) {
+               mtx_unlock(&fnp->node_lock);
+               kflags |= NOTE_ATTRIB;
+               return EOPNOTSUPP; /* XXX */
+       }
+
+       if (!error && (vap->va_size != VNOVAL)) {
+               if (vp->v_type == VDIR) {
+                       mtx_unlock(&fnp->node_lock);
+                       return EISDIR;
+               }
+               if (vp->v_type == VREG &&
+                   (vp->v_mount->mnt_flag & MNT_RDONLY)) {
+                       mtx_unlock(&fnp->node_lock);
+                       return EROFS;
+               }
+               arg.size = vap->va_size;
+               arg.valid |= FATTR_SIZE;
+               if (vap->va_size > fnp->size)
+                       kflags |= NOTE_WRITE | NOTE_EXTEND;
+               else
+                       kflags |= NOTE_WRITE;
+       }
+
+       if (!error && (vap->va_uid != (uid_t)VNOVAL ||
+           vap->va_gid != (gid_t)VNOVAL)) {
+               mode_t mode;
+               error = vop_helper_chown(vp, vap->va_uid, vap->va_gid,
+                   ap->a_cred, &arg.uid, &arg.gid, &mode);
+               arg.valid |= FATTR_UID;
+               arg.valid |= FATTR_GID;
+               kflags |= NOTE_ATTRIB;
+       }
+
+       if (!error && (vap->va_mode != (mode_t)VNOVAL)) {
+               error = vop_helper_chmod(vp, vap->va_mode, ap->a_cred,
+                   vap->va_uid, vap->va_gid, (mode_t*)&arg.mode);
+               arg.valid |= FATTR_MODE;
+               kflags |= NOTE_ATTRIB;
+       }
+
+       if (!error && (vap->va_atime.tv_sec != VNOVAL &&
+           vap->va_atime.tv_nsec != VNOVAL)) {
+               arg.atime = vap->va_atime.tv_sec;
+               arg.atimensec = vap->va_atime.tv_nsec;
+               arg.valid |= FATTR_ATIME;
+               kflags |= NOTE_ATTRIB;
+       }
+
+       if (!error && (vap->va_mtime.tv_sec != VNOVAL &&
+           vap->va_mtime.tv_nsec != VNOVAL)) {
+               arg.mtime = vap->va_mtime.tv_sec;
+               arg.mtimensec = vap->va_mtime.tv_nsec;
+               arg.valid |= FATTR_MTIME;
+               kflags |= NOTE_ATTRIB;
+       }
+
+       if (!error && (vap->va_ctime.tv_sec != VNOVAL &&
+           vap->va_ctime.tv_nsec != VNOVAL)) {
+               arg.ctime = vap->va_ctime.tv_sec;
+               arg.ctimensec = vap->va_ctime.tv_nsec;
+               arg.valid |= FATTR_CTIME;
+               kflags |= NOTE_ATTRIB;
+       }
+
+       mtx_unlock(&fnp->node_lock);
+
+       if (error)
+               return error;
+       if (!arg.valid)
+               return 0;
+
+       fip = fuse_ipc_get(fmp, sizeof(*fsi));
+       fsi = fuse_ipc_fill(fip, FUSE_SETATTR, fnp->ino, ap->a_cred);
+       memcpy(fsi, &arg, sizeof(arg));
+#if 0
+       fsi->valid |= FATTR_FH;
+       fsi->fh = fuse_nfh(fnp);
+       if (ap->a_fp) {
+               /* vn_open() may call VOP_SETATTR_FP() prior to VOP_OPEN(). */
+               if (!ap->a_fp->private_data)
+                       fuse_get_fh(ap->a_fp, 0); /* XXX */
+               fsi->fh = fuse_fh(ap->a_fp);
+       }
+#endif
+       error = fuse_ipc_tx(fip);
+       if (error)
+               return error;
+
+       fao = fuse_out_data(fip);
+       if (IFTOVT(fao->attr.mode) != vp->v_type) {
+               fuse_ipc_put(fip);
+               return EINVAL;
+       }
+       mtx_lock(&fnp->node_lock);
+       fuse_set_attr(fnp, &fao->attr);
+       /* unused */
+       //fao->attr_valid;
+       //fao->attr_valid_nsec;
+       mtx_unlock(&fnp->node_lock);
+
+       fuse_ipc_put(fip);
+       fuse_knote(vp, kflags);
+
+       return 0;
+}
+
+static int
+fuse_vop_nresolve(struct vop_nresolve_args *ap)
+{
+       struct vnode *dvp = ap->a_dvp;
+       struct vnode *vp;
+       struct namecache *ncp = ap->a_nch->ncp;
+       struct fuse_mount *fmp = VFSTOFUSE(dvp->v_mount);
+       struct fuse_node *dfnp = VTOI(dvp);
+       struct fuse_ipc *fip;
+       struct fuse_entry_out *feo;
+       char *p, tmp[1024];
+       uint32_t mode;
+       enum vtype vtyp;
+       int error;
+
+       if (fuse_test_dead(fmp))
+               return ENOTCONN;
+
+       if (fuse_test_nosys(fmp, FUSE_LOOKUP))
+               return EOPNOTSUPP;
+
+       fip = fuse_ipc_get(fmp, ncp->nc_nlen + 1);
+       p = fuse_ipc_fill(fip, FUSE_LOOKUP, dfnp->ino, ap->a_cred);
+
+       memcpy(p, ncp->nc_name, ncp->nc_nlen);
+       p[ncp->nc_nlen] = '\0';
+       strlcpy(tmp, p, sizeof(tmp));
+
+       error = fuse_ipc_tx(fip);
+       if (error == ENOENT) {
+               cache_setvp(ap->a_nch, NULL);
+               fuse_dbg("lookup \"%s\" ENOENT\n", tmp);
+               return ENOENT;
+       } else if (error) {
+               fuse_dbg("lookup \"%s\" error=%d\n", tmp, error);
+               return error;
+       }
+
+       feo = fuse_out_data(fip);
+       fuse_dbg("lookup \"%s\" ino=%ju/%ju\n", p, feo->nodeid, feo->attr.ino);
+
+       mode = feo->attr.mode;
+       if (S_ISREG(mode))
+               vtyp = VREG;
+       else if (S_ISDIR(mode))
+               vtyp = VDIR;
+       else if (S_ISBLK(mode))
+               vtyp = VBLK;
+       else if (S_ISCHR(mode))
+               vtyp = VCHR;
+       else if (S_ISLNK(mode))
+               vtyp = VLNK;
+       else if (S_ISSOCK(mode))
+               vtyp = VSOCK;
+       else if (S_ISFIFO(mode))
+               vtyp = VFIFO;
+       else
+               vtyp = VBAD;
+
+       error = fuse_alloc_node(dfnp, feo->nodeid, p, strlen(p), vtyp, &vp);
+       if (error) {
+               fuse_ipc_put(fip);
+               return error;
+       }
+       KKASSERT(vp);
+       KKASSERT(vn_islocked(vp));
+
+       vn_unlock(vp);
+       cache_setvp(ap->a_nch, vp);
+       vrele(vp);
+
+       /* unused */
+       //feo->generation;
+       //feo->entry_valid;
+       //feo->attr_valid;
+       //feo->entry_valid_nsec;
+       //feo->attr_valid_nsec;
+
+       fuse_ipc_put(fip);
+
+       return 0;
+}
+
+static int
+fuse_vop_nlink(struct vop_nlink_args *ap)
+{
+       struct vnode *dvp = ap->a_dvp;
+       struct vnode *vp = ap->a_vp;
+       struct namecache *ncp = ap->a_nch->ncp;
+       struct fuse_mount *fmp = VFSTOFUSE(vp->v_mount);
+       struct fuse_node *dfnp = VTOI(dvp);
+       struct fuse_node *fnp = VTOI(vp);
+       struct fuse_dent *fep;
+       struct fuse_ipc *fip;
+       struct fuse_link_in *fli;
+       struct fuse_entry_out *feo;
+       char *p;
+       int error;
+
+       if (fuse_test_dead(fmp))
+               return ENOTCONN;
+
+       if (fuse_test_nosys(fmp, FUSE_LINK))
+               return EOPNOTSUPP;
+
+       if (vp->v_type == VDIR)
+               return EPERM;
+       if (dvp->v_mount != vp->v_mount)
+               return EXDEV;
+       if (fnp->nlink >= LINK_MAX)
+               return EMLINK;
+
+       fip = fuse_ipc_get(fmp, sizeof(fli) + ncp->nc_nlen + 1);
+       fli = fuse_ipc_fill(fip, FUSE_LINK, dfnp->ino, ap->a_cred);
+       fli->oldnodeid = fnp->ino;
+
+       p = (char*)(fli + 1);
+       memcpy(p, ncp->nc_name, ncp->nc_nlen);
+       p[ncp->nc_nlen] = '\0';
+
+       error = fuse_ipc_tx(fip);
+       if (error)
+               return error;
+
+       feo = fuse_out_data(fip);
+       if (IFTOVT(feo->attr.mode) != vp->v_type) {
+               fuse_ipc_put(fip);
+               return EINVAL;
+       }
+
+       mtx_lock(&dfnp->node_lock);
+       mtx_lock(&fnp->node_lock);
+       fuse_dent_new(fnp, p, strlen(p), &fep);
+       fuse_dent_attach(dfnp, fep);
+       fuse_set_attr(fnp, &feo->attr);
+       mtx_unlock(&fnp->node_lock);
+       mtx_unlock(&dfnp->node_lock);
+
+       cache_setunresolved(ap->a_nch);
+       cache_setvp(ap->a_nch, vp);
+       fuse_knote(dvp, NOTE_WRITE);
+       fuse_knote(vp, NOTE_LINK);
+
+       /* unused */
+       //feo->nodeid;
+       //feo->generation;
+       //feo->entry_valid;
+       //feo->attr_valid;
+       //feo->entry_valid_nsec;
+       //feo->attr_valid_nsec;
+
+       fuse_ipc_put(fip);
+
+       return 0;
+}
+
+static int
+fuse_vop_ncreate(struct vop_ncreate_args *ap)
+{
+       struct vnode *dvp = ap->a_dvp;
+       struct vnode *vp;
+       struct namecache *ncp = ap->a_nch->ncp;
+       struct fuse_mount *fmp = VFSTOFUSE(dvp->v_mount);
+       struct fuse_node *dfnp = VTOI(dvp);
+       struct fuse_node *fnp;
+       struct fuse_ipc *fip;
+       struct fuse_create_in *fci;
+       struct fuse_entry_out *feo;
+       struct fuse_open_out *foo;
+       enum vtype vtyp;
+       char *p;
+       int error;
+
+       if (fuse_test_dead(fmp))
+               return ENOTCONN;
+
+       if (fuse_test_nosys(fmp, FUSE_CREATE))
+               return EOPNOTSUPP;
+
+       fip = fuse_ipc_get(fmp, sizeof(*fci) + ncp->nc_nlen + 1);
+       fci = fuse_ipc_fill(fip, FUSE_CREATE, dfnp->ino, ap->a_cred);
+       fci->flags = OFLAGS(ap->a_vap->va_fuseflags);
+       fci->mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
+       /* unused */
+       //fci->umask = ...;
+       fuse_dbg("flags=%X mode=%X\n", fci->flags, fci->mode);
+
+       p = (char*)(fci + 1);
+       memcpy(p, ncp->nc_name, ncp->nc_nlen);
+       p[ncp->nc_nlen] = '\0';
+
+       error = fuse_ipc_tx(fip);
+       if (error)
+               return error;
+
+       feo = fuse_out_data(fip);
+       foo = (struct fuse_open_out*)(feo + 1);
+       vtyp = IFTOVT(feo->attr.mode);
+       if (vtyp != VREG && vtyp != VSOCK) {
+               fuse_ipc_put(fip);
+               return EINVAL;
+       }
+
+       error = fuse_alloc_node(dfnp, feo->nodeid, p, strlen(p), VREG, &vp);
+       if (error) {
+               fuse_ipc_put(fip);
+               return error;
+       }
+       KKASSERT(vp);
+       KKASSERT(vn_islocked(vp));
+
+       fnp = VTOI(vp);
+       mtx_lock(&fnp->node_lock);
+       fuse_set_attr(fnp, &feo->attr);
+       mtx_unlock(&fnp->node_lock);
+
+       cache_setunresolved(ap->a_nch);
+       cache_setvp(ap->a_nch, vp);
+       *(ap->a_vpp) = vp;
+       fuse_knote(dvp, NOTE_WRITE);
+
+       /* unused */
+       //feo->generation;
+       //feo->entry_valid;
+       //feo->attr_valid;
+       //feo->entry_valid_nsec;
+       //feo->attr_valid_nsec;
+       /* unused */
+       //foo->open_flags;
+
+       fuse_ipc_put(fip);
+
+       return 0;
+}
+
+static int
+fuse_vop_nmknod(struct vop_nmknod_args *ap)
+{
+       struct vnode *dvp = ap->a_dvp;
+       struct vnode *vp;
+       struct namecache *ncp = ap->a_nch->ncp;
+       struct fuse_mount *fmp = VFSTOFUSE(dvp->v_mount);
+       struct fuse_node *dfnp = VTOI(dvp);
+       struct fuse_node *fnp;
+       struct fuse_ipc *fip;
+       struct fuse_mknod_in *fmi;
+       struct fuse_entry_out *feo;
+       enum vtype vtyp;
+       char *p;
+       int error;
+
+       if (fuse_test_dead(fmp))
+               return ENOTCONN;
+
+       if (fuse_test_nosys(fmp, FUSE_MKNOD))
+               return EOPNOTSUPP;
+
+       fip = fuse_ipc_get(fmp, sizeof(*fmi) + ncp->nc_nlen + 1);
+       fmi = fuse_ipc_fill(fip, FUSE_MKNOD, dfnp->ino, ap->a_cred);
+       fmi->mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
+       /* unused */
+       //fmi->rdev = ...;
+       //fmi->umask = ...;
+
+       p = (char*)(fmi + 1);
+       memcpy(p, ncp->nc_name, ncp->nc_nlen);
+       p[ncp->nc_nlen] = '\0';
+
+       error = fuse_ipc_tx(fip);
+       if (error)
+               return error;
+
+       feo = fuse_out_data(fip);
+       vtyp = IFTOVT(feo->attr.mode);
+       if (vtyp != VBLK && vtyp != VCHR && vtyp != VFIFO) {
+               fuse_ipc_put(fip);
+               return EINVAL;
+       }
+
+       error = fuse_alloc_node(dfnp, feo->nodeid, p, strlen(p),
+           ap->a_vap->va_type, &vp);
+       if (error) {
+               fuse_ipc_put(fip);
+               return error;
+       }
+       KKASSERT(vp);
+       KKASSERT(vn_islocked(vp));
+
+       fnp = VTOI(vp);
+       mtx_lock(&fnp->node_lock);
+       fuse_set_attr(fnp, &feo->attr);
+       mtx_unlock(&fnp->node_lock);
+
+       cache_setunresolved(ap->a_nch);
+       cache_setvp(ap->a_nch, vp);
+       *(ap->a_vpp) = vp;
+       fuse_knote(dvp, NOTE_WRITE);
+
+       /* unused */
+       //feo->generation;
+       //feo->entry_valid;
+       //feo->attr_valid;
+       //feo->entry_valid_nsec;
+       //feo->attr_valid_nsec;
+
+       fuse_ipc_put(fip);
+
+       return 0;
+}
+
+static int
+fuse_vop_nremove(struct vop_nremove_args *ap)
+{
+       struct vnode *dvp = ap->a_dvp;
+       struct vnode *vp;
+       struct namecache *ncp = ap->a_nch->ncp;
+       struct fuse_mount *fmp = VFSTOFUSE(dvp->v_mount);
+       struct fuse_node *dfnp = VTOI(dvp);
+       struct fuse_node *fnp;
+       struct fuse_dent *fep;
+       struct fuse_ipc *fip;
+       char *p;
+       int error;
+
+       if (fuse_test_dead(fmp))
+               return ENOTCONN;
+
+       if (fuse_test_nosys(fmp, FUSE_UNLINK))
+               return EOPNOTSUPP;
+
+       error = cache_vget(ap->a_nch, ap->a_cred, LK_SHARED, &vp);
+       KKASSERT(vp->v_mount == dvp->v_mount);
+       KKASSERT(!error); /* from tmpfs */
+       vn_unlock(vp);
+
+       fip = fuse_ipc_get(fmp, ncp->nc_nlen + 1);
+       p = fuse_ipc_fill(fip, FUSE_UNLINK, dfnp->ino, ap->a_cred);
+
+       memcpy(p, ncp->nc_name, ncp->nc_nlen);
+       p[ncp->nc_nlen] = '\0';
+
+       error = fuse_ipc_tx(fip);
+       if (error) {
+               vrele(vp);
+               return error;
+       }
+
+       fnp = VTOI(vp);
+       mtx_lock(&dfnp->node_lock);
+       mtx_lock(&fnp->node_lock);
+       error = fuse_dent_find(dfnp, p, strlen(p), &fep);
+       if (error == ENOENT) {
+               mtx_unlock(&fnp->node_lock);
+               mtx_unlock(&dfnp->node_lock);
+               fuse_ipc_put(fip);
+               vrele(vp);
+               return error;
+       }
+       fuse_dent_detach(dfnp, fep);
+       fuse_dent_free(fep);
+       mtx_unlock(&fnp->node_lock);
+       mtx_unlock(&dfnp->node_lock);
+
+       cache_unlink(ap->a_nch);
+       fuse_knote(dvp, NOTE_WRITE);
+       fuse_knote(vp, NOTE_DELETE);
+
+       fuse_ipc_put(fip);
+       vrele(vp);
+
+       return 0;
+}
+
+static int
+fuse_vop_nmkdir(struct vop_nmkdir_args *ap)
+{
+       struct vnode *dvp = ap->a_dvp;
+       struct vnode *vp;
+       struct namecache *ncp = ap->a_nch->ncp;
+       struct fuse_mount *fmp = VFSTOFUSE(dvp->v_mount);
+       struct fuse_node *dfnp = VTOI(dvp);
+       struct fuse_node *fnp;
+       struct fuse_ipc *fip;
+       struct fuse_mkdir_in *fmi;
+       struct fuse_entry_out *feo;
+       char *p;
+       int error;
+
+       if (fuse_test_dead(fmp))
+               return ENOTCONN;
+
+       if (fuse_test_nosys(fmp, FUSE_MKDIR))
+               return EOPNOTSUPP;
+
+       fip = fuse_ipc_get(fmp, sizeof(*fmi) + ncp->nc_nlen + 1);
+       fmi = fuse_ipc_fill(fip, FUSE_MKDIR, dfnp->ino, ap->a_cred);
+       fmi->mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
+
+       p = (char*)(fmi + 1);
+       memcpy(p, ncp->nc_name, ncp->nc_nlen);
+       p[ncp->nc_nlen] = '\0';
+
+       error = fuse_ipc_tx(fip);
+       if (error)
+               return error;
+
+       feo = fuse_out_data(fip);
+       if (IFTOVT(feo->attr.mode) != VDIR) {
+               fuse_ipc_put(fip);
+               return EINVAL;
+       }
+
+       error = fuse_alloc_node(dfnp, feo->nodeid, p, strlen(p), VDIR, &vp);
+       if (error) {
+               fuse_ipc_put(fip);
+               return error;
+       }
+       KKASSERT(vp);
+       KKASSERT(vn_islocked(vp));
+
+       fnp = VTOI(vp);
+       mtx_lock(&fnp->node_lock);
+       fuse_set_attr(fnp, &feo->attr);
+       mtx_unlock(&fnp->node_lock);
+
+       cache_setunresolved(ap->a_nch);
+       cache_setvp(ap->a_nch, vp);
+       *(ap->a_vpp) = vp;
+       fuse_knote(dvp, NOTE_WRITE | NOTE_LINK);
+
+       /* unused */
+       //feo->generation;
+       //feo->entry_valid;
+       //feo->attr_valid;
+       //feo->entry_valid_nsec;
+       //feo->attr_valid_nsec;
+
+       fuse_ipc_put(fip);
+
+       return 0;
+}
+
+static int
+fuse_vop_nrmdir(struct vop_nrmdir_args *ap)
+{
+       struct vnode *dvp = ap->a_dvp;
+       struct vnode *vp;
+       struct namecache *ncp = ap->a_nch->ncp;
+       struct fuse_mount *fmp = VFSTOFUSE(dvp->v_mount);
+       struct fuse_node *dfnp = VTOI(dvp);
+       struct fuse_node *fnp;
+       struct fuse_dent *fep;
+       struct fuse_ipc *fip;
+       char *p;
+       int error;
+
+       if (fuse_test_dead(fmp))
+               return ENOTCONN;
+
+       if (fuse_test_nosys(fmp, FUSE_RMDIR))
+               return EOPNOTSUPP;
+
+       error = cache_vget(ap->a_nch, ap->a_cred, LK_SHARED, &vp);
+       KKASSERT(vp->v_mount == dvp->v_mount);
+       KKASSERT(!error); /* from tmpfs */
+       vn_unlock(vp);
+
+       fip = fuse_ipc_get(fmp, ncp->nc_nlen + 1);
+       p = fuse_ipc_fill(fip, FUSE_RMDIR, dfnp->ino, ap->a_cred);
+
+       memcpy(p, ncp->nc_name, ncp->nc_nlen);
+       p[ncp->nc_nlen] = '\0';
+
+       error = fuse_ipc_tx(fip);
+       if (error) {
+               vrele(vp);
+               return error;
+       }
+
+       fnp = VTOI(vp);
+       mtx_lock(&dfnp->node_lock);
+       mtx_lock(&fnp->node_lock);
+       error = fuse_dent_find(dfnp, p, strlen(p), &fep);
+       if (error == ENOENT) {
+               mtx_unlock(&fnp->node_lock);
+               mtx_unlock(&dfnp->node_lock);
+               fuse_ipc_put(fip);
+               vrele(vp);
+               return error;
+       }
+       fuse_dent_detach(dfnp, fep);
+       fuse_dent_free(fep);
+       mtx_unlock(&fnp->node_lock);
+       mtx_unlock(&dfnp->node_lock);
+
+       cache_unlink(ap->a_nch);
+       fuse_knote(dvp, NOTE_WRITE | NOTE_LINK);
+
+       fuse_ipc_put(fip);
+       vrele(vp);
+
+       return 0;
+}
+
+static int
+fuse_vop_pathconf(struct vop_pathconf_args *ap)
+{
+       switch (ap->a_name) {
+       case _PC_FILESIZEBITS:
+               *ap->a_retval = 64;
+               break;
+       case _PC_NO_TRUNC:
+               *ap->a_retval = 1;
+               break;
+       default:
+               return vop_stdpathconf(ap);
+       }
+
+       return 0;
+}
+
+static int
+fuse_vop_readdir(struct vop_readdir_args *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       struct uio *uio = ap->a_uio;
+       struct fuse_mount *fmp = VFSTOFUSE(vp->v_mount);
+       struct fuse_ipc *fip;
+       struct fuse_read_in *fri;
+       const char *buf;
+       size_t len;
+       off_t cur_offset = 0;
+       int error;
+
+       if (fuse_test_dead(fmp))
+               return ENOTCONN;
+
+       if (fuse_test_nosys(fmp, FUSE_READDIR))
+               return EOPNOTSUPP;
+
+       fip = fuse_ipc_get(fmp, sizeof(*fri));
+       fri = fuse_ipc_fill(fip, FUSE_READDIR, VTOI(vp)->ino, ap->a_cred);
+       fri->fh = fuse_nfh(VTOI(vp));
+       if (ap->a_fp)
+               fri->fh = fuse_fh(ap->a_fp);
+       fri->offset = 0;
+       /*
+        * XXX This needs to be large enough to read all entries at once.
+        * FUSE filesystems typically just opendir/readdir and return entries.
+        */
+       fri->size = FUSE_BLKSIZE * 10;
+       /* unused */
+       //fri->read_flags = ...;
+       //fri->lock_owner = ...;
+       //fri->flags = ...;
+
+       error = fuse_ipc_tx(fip);
+       if (error)
+               return error;
+
+       buf = fuse_out_data(fip);
+       len = fuse_out_data_size(fip);
+
+       while (1) {
+               const struct fuse_dirent *fde;
+               size_t freclen;
+
+               fuse_dbg("uio_offset=%ju uio_resid=%ju\n",
+                   uio->uio_offset, uio->uio_resid);
+
+               if (len < FUSE_NAME_OFFSET) {
+                       if (ap->a_eofflag)
+                               *ap->a_eofflag = 1;
+                       break;
+               }
+               if (uio->uio_resid < FUSE_NAME_OFFSET)
+                       break;
+
+               fde = (const struct fuse_dirent*)buf;
+               if (!fde->namelen) {
+                       error = EINVAL;
+                       break;
+               }
+               freclen = FUSE_DIRENT_SIZE(fde);
+
+               /*
+                * Also see
+                * getdirentries(2) in sys/kern/vfs_syscalls.c
+                * readdir(3) in lib/libc/gen/readdir.c
+                */
+               if (cur_offset >= uio->uio_offset) {
+                       error = 0;
+                       if (vop_write_dirent(&error, uio, fde->ino, fde->type,
+                           fde->namelen, fde->name))
+                               break;
+                       if (error)
+                               break;
+                       fuse_dbg("ino=%ju type=%d name=%s len=%u\n",
+                           fde->ino, fde->type, fde->name, fde->namelen);
+               }
+
+               cur_offset += _DIRENT_RECLEN(fde->namelen);
+               buf += freclen;
+               len -= freclen;
+       }
+       fuse_ipc_put(fip);
+
+       return error;
+}
+
+static int
+fuse_vop_readlink(struct vop_readlink_args *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       struct fuse_mount *fmp = VFSTOFUSE(vp->v_mount);
+       struct fuse_ipc *fip;
+       int error;
+
+       if (fuse_test_dead(fmp))
+               return ENOTCONN;
+
+       if (fuse_test_nosys(fmp, FUSE_READLINK))
+               return EOPNOTSUPP;
+
+       if (vp->v_type != VLNK)
+               return EINVAL;
+
+       fip = fuse_ipc_get(fmp, 0);
+       fuse_ipc_fill(fip, FUSE_READLINK, VTOI(vp)->ino, ap->a_cred);
+
+       error = fuse_ipc_tx(fip);
+       if (error)
+               return error;
+
+       error = uiomove(fuse_out_data(fip), fuse_out_data_size(fip), ap->a_uio);
+
+       fuse_ipc_put(fip);
+
+       return error;
+}
+
+static int
+fuse_vop_nrename(struct vop_nrename_args *ap)
+{
+       struct namecache *fncp = ap->a_fnch->ncp;
+       struct namecache *tncp = ap->a_tnch->ncp;
+       struct vnode *fdvp = ap->a_fdvp;
+       struct vnode *fvp = fncp->nc_vp;
+       struct vnode *tdvp = ap->a_tdvp;
+       struct vnode *tvp;
+       struct fuse_mount *fmp = VFSTOFUSE(fdvp->v_mount);
+       struct fuse_node *fdfnp = VTOI(fdvp);
+       struct fuse_node *ffnp = VTOI(fvp);
+       struct fuse_node *tdfnp = VTOI(tdvp);
+       struct fuse_node *tfnp;
+       struct fuse_dent *ffep;
+       struct fuse_dent *tfep;
+       struct fuse_ipc *fip;
+       struct fuse_rename_in *fri;
+       char *p, *newname, *oldname;
+       int error;
+
+       KKASSERT(fdvp->v_mount == fvp->v_mount);
+
+       if (fuse_test_dead(fmp))
+               return ENOTCONN;
+
+       if (fuse_test_nosys(fmp, FUSE_RENAME))
+               return EOPNOTSUPP;
+
+       error = cache_vget(ap->a_tnch, ap->a_cred, LK_SHARED, &tvp);
+       if (!error) {
+               tfnp = VTOI(tvp);
+               vn_unlock(tvp);
+       } else
+               tfnp = NULL;
+
+       /* Disallow cross-device renames.
+        * Why isn't this done by the caller? */
+       if (fvp->v_mount != tdvp->v_mount ||
+           (tvp && fvp->v_mount != tvp->v_mount)) {
+               error = EXDEV;
+               goto out;
+       }
+
+       if (fvp == tvp) {
+               error = 0;
+               goto out;
+       }
+       error = fuse_dent_find(fdfnp, fncp->nc_name, fncp->nc_nlen, &ffep);
+       if (error == ENOENT)
+               goto out;
+       KKASSERT(ffep->fnp == ffnp);
+
+       if (tvp) {
+               KKASSERT(tfnp);
+               if (ffnp->type == VDIR && tfnp->type == VDIR) {
+                       if (!RB_EMPTY(&tfnp->dent_head)) {
+                               error = ENOTEMPTY;
+                               goto out;
+                       }
+               } else if (ffnp->type == VDIR && tfnp->type != VDIR) {
+                       error = ENOTDIR;
+                       goto out;
+               } else if (ffnp->type != VDIR && tfnp->type == VDIR) {
+                       error = EISDIR;
+                       goto out;
+               } else
+                       KKASSERT(ffnp->type != VDIR && tfnp->type != VDIR);
+       }
+
+       fip = fuse_ipc_get(fmp,
+           sizeof(*fri) + fncp->nc_nlen + tncp->nc_nlen + 2);
+       /* There is also fuse_rename2_in with flags. */
+       fri = fuse_ipc_fill(fip, FUSE_RENAME, fdfnp->ino, ap->a_cred);
+       fri->newdir = tdfnp->ino;
+
+       p = (char*)(fri + 1);
+       memcpy(p, fncp->nc_name, fncp->nc_nlen);
+       p[fncp->nc_nlen] = '\0';
+       memcpy(p + fncp->nc_nlen + 1, tncp->nc_name, tncp->nc_nlen);
+       p[fncp->nc_nlen + 1 + tncp->nc_nlen] = '\0';
+
+       error = fuse_ipc_tx(fip);
+       if (error)
+               goto out;
+       fuse_ipc_put(fip);
+
+       if (fncp->nc_nlen != tncp->nc_nlen ||
+           memcmp(fncp->nc_name, tncp->nc_name, fncp->nc_nlen)) {
+               newname = kmalloc(tncp->nc_nlen + 1, M_TEMP, M_WAITOK | M_ZERO);
+               KKASSERT(newname);
+               memcpy(newname, tncp->nc_name, tncp->nc_nlen);
+               newname[tncp->nc_nlen] = '\0';
+               fuse_dbg("newname=\"%s\"\n", newname);
+       } else
+               newname = NULL;
+
+       mtx_lock(&tdfnp->node_lock);
+       mtx_lock(&fdfnp->node_lock);
+       mtx_lock(&ffnp->node_lock);
+
+       fuse_dbg("detach from_dent=\"%s\"\n", ffep->name);
+       fuse_dent_detach(fdfnp, ffep);
+
+       if (newname) {
+               oldname = ffep->name;
+               ffep->name = newname;
+               newname = oldname;
+       }
+
+       if (tvp) {
+               mtx_lock(&tfnp->node_lock);
+               error = fuse_dent_find(tdfnp, tncp->nc_name, tncp->nc_nlen,
+                   &tfep);
+               KKASSERT(!error);
+               fuse_dbg("detach/free to_dent=\"%s\"\n", tfep->name);
+               fuse_dent_detach(tdfnp, tfep);
+               fuse_dent_free(tfep);
+               mtx_unlock(&tfnp->node_lock);
+               fuse_knote(tdvp, NOTE_DELETE);
+       }
+
+       fuse_dbg("attach from_dent=\"%s\"\n", ffep->name);
+       fuse_dent_attach(tdfnp, ffep);
+
+       mtx_unlock(&ffnp->node_lock);
+       mtx_unlock(&fdfnp->node_lock);
+       mtx_unlock(&tdfnp->node_lock);
+
+       if (newname)
+               kfree(newname, M_TEMP);
+
+       cache_rename(ap->a_fnch, ap->a_tnch);
+       fuse_knote(fdvp, NOTE_WRITE);
+       fuse_knote(tdvp, NOTE_WRITE);
+       fuse_knote(fvp, NOTE_RENAME);
+out:
+       if (tvp)
+               vrele(tvp);
+
+       return error;
+}
+
+static int
+fuse_vop_nsymlink(struct vop_nsymlink_args *ap)
+{
+       struct vnode *dvp = ap->a_dvp;
+       struct vnode *vp;
+       struct namecache *ncp = ap->a_nch->ncp;
+       struct fuse_mount *fmp = VFSTOFUSE(dvp->v_mount);
+       struct fuse_node *dfnp = VTOI(dvp);
+       struct fuse_node *fnp;
+       struct fuse_ipc *fip;
+       struct fuse_entry_out *feo;
+       char *p;
+       int error;
+
+       if (fuse_test_dead(fmp))
+               return ENOTCONN;
+
+       if (fuse_test_nosys(fmp, FUSE_SYMLINK))
+               return EOPNOTSUPP;
+
+       fip = fuse_ipc_get(fmp, strlen(ap->a_target) + 1 + ncp->nc_nlen + 1);
+       p = fuse_ipc_fill(fip, FUSE_SYMLINK, dfnp->ino, ap->a_cred);
+
+       memcpy(p, ncp->nc_name, ncp->nc_nlen);
+       p[ncp->nc_nlen] = '\0';
+       memcpy(p + ncp->nc_nlen + 1, ap->a_target, strlen(ap->a_target) + 1);
+
+       error = fuse_ipc_tx(fip);
+       if (error)
+               return error;
+
+       feo = fuse_out_data(fip);
+       if (IFTOVT(feo->attr.mode) != VLNK) {
+               fuse_ipc_put(fip);
+               return EINVAL;
+       }
+
+       error = fuse_alloc_node(dfnp, feo->nodeid, p, strlen(p), VLNK, &vp);
+       if (error) {
+               fuse_ipc_put(fip);
+               return error;
+       }
+       KKASSERT(vp);
+       KKASSERT(vn_islocked(vp));
+
+       fnp = VTOI(vp);
+       mtx_lock(&fnp->node_lock);
+       fuse_set_attr(fnp, &feo->attr);
+       mtx_unlock(&fnp->node_lock);
+
+       cache_setunresolved(ap->a_nch);
+       cache_setvp(ap->a_nch, vp);
+       *(ap->a_vpp) = vp;
+       fuse_knote(vp, NOTE_WRITE);
+
+       /* unused */
+       //feo->generation;
+       //feo->entry_valid;
+       //feo->attr_valid;
+       //feo->entry_valid_nsec;
+       //feo->attr_valid_nsec;
+
+       fuse_ipc_put(fip);
+
+       return 0;
+}
+
+static int
+fuse_vop_read(struct vop_read_args *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       struct uio *uio = ap->a_uio;
+       struct fuse_mount *fmp = VFSTOFUSE(vp->v_mount);
+
+       fuse_dbg("ino=%ju ioflag=%x\n", VTOI(vp)->ino, ap->a_ioflag);
+
+       if (fuse_test_dead(fmp))
+               return ENOTCONN;
+
+       if (fuse_test_nosys(fmp, FUSE_READ))
+               return EOPNOTSUPP;
+
+       if (!uio->uio_resid)
+               return 0;
+
+       return fuse_read(ap);
+}
+
+static int
+fuse_vop_write(struct vop_write_args *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       struct uio *uio = ap->a_uio;
+       struct fuse_mount *fmp = VFSTOFUSE(vp->v_mount);
+
+       fuse_dbg("ino=%ju ioflag=%x\n", VTOI(vp)->ino, ap->a_ioflag);
+       return EOPNOTSUPP; /* XXX disabled */
+
+       if (fuse_test_dead(fmp))
+               return ENOTCONN;
+
+       if (fuse_test_nosys(fmp, FUSE_WRITE))
+               return EOPNOTSUPP;
+
+       if (!uio->uio_resid)
+               return 0;
+
+       if (ap->a_ioflag & IO_DIRECT)
+               return fuse_dio_write(ap);
+       else
+               return fuse_write(ap);
+}
+
+static int
+fuse_vop_strategy(struct vop_strategy_args *ap)
+{
+       struct bio *bio = ap->a_bio;
+       struct buf *bp = bio->bio_buf;
+
+       fuse_dbg("ino=%ju b_cmd=%d\n", VTOI(ap->a_vp)->ino, bp->b_cmd);
+
+       bp->b_resid = 0;
+       bp->b_error = 0;
+       biodone(bio);
+
+       return 0;
+}
+
+static int
+fuse_bmap(struct vop_bmap_args *ap)
+{
+       fuse_dbg("ino=%ju a_cmd=%d a_loffset=%ju\n",
+           VTOI(ap->a_vp)->ino, ap->a_cmd, ap->a_loffset);
+
+       return EOPNOTSUPP;
+}
+
+static int
+fuse_vop_print(struct vop_print_args *ap)
+{
+       struct fuse_node *fnp = VTOI(ap->a_vp);
+
+       fuse_print("tag VT_FUSE, node %p, ino %ju, parent ino %ju\n",
+           fnp, VTOI(ap->a_vp)->ino, VTOI(fnp->pfnp->vp)->ino);
+
+       return 0;
+}
+
+static int
+fuse_vop_inactive(struct vop_inactive_args *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       struct mount *mp = vp->v_mount;
+       struct fuse_node *fnp = VTOI(vp);
+
+       lwkt_gettoken(&mp->mnt_token);
+       if (!fnp) {
+               vrecycle(ap->a_vp);
+               lwkt_reltoken(&mp->mnt_token);
+               return 0;
+       }
+
+       fuse_dbg("ino=%ju nlink=%d\n", fnp->ino, fnp->nlink);
+       vinvalbuf(vp, V_SAVE, 0, 0);
+       lwkt_reltoken(&mp->mnt_token);
+
+       return 0;
+}
+
+static int
+fuse_vop_reclaim(struct vop_reclaim_args *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       struct mount *mp = vp->v_mount;
+       struct fuse_node *fnp = VTOI(vp);
+
+       lwkt_gettoken(&mp->mnt_token);
+       if (fnp) {
+               fuse_dbg("ino=%ju\n", fnp->ino);
+               fuse_node_free(fnp);
+               vclrisdirty(vp);
+       }
+       lwkt_reltoken(&mp->mnt_token);
+
+       return 0;
+}
+
+static int
+fuse_vop_mountctl(struct vop_mountctl_args *ap)
+{
+       struct mount *mp;
+       int res = 0;
+
+       mp = ap->a_head.a_ops->head.vv_mount;
+       lwkt_gettoken(&mp->mnt_token);
+
+       switch (ap->a_op) {
+       //case MOUNTCTL_MOUNTFLAGS:
+       //      ...
+       //      break;
+       default:
+               res = vop_stdmountctl(ap);
+               break;
+       }
+
+       lwkt_reltoken(&mp->mnt_token);
+       return res;
+}
+
+static void filt_fusedetach(struct knote*);
+static int filt_fuseread(struct knote*, long);
+static int filt_fusewrite(struct knote*, long);
+static int filt_fusevnode(struct knote*, long);
+
+static struct filterops fuseread_filtops =
+       { FILTEROP_ISFD | FILTEROP_MPSAFE,
+         NULL, filt_fusedetach, filt_fuseread };
+static struct filterops fusewrite_filtops =
+       { FILTEROP_ISFD | FILTEROP_MPSAFE,
+         NULL, filt_fusedetach, filt_fusewrite };
+static struct filterops fusevnode_filtops =
+       { FILTEROP_ISFD | FILTEROP_MPSAFE,
+         NULL, filt_fusedetach, filt_fusevnode };
+
+static int
+fuse_kqfilter(struct vop_kqfilter_args *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       struct knote *kn = ap->a_kn;
+
+       switch (kn->kn_filter) {
+       case EVFILT_READ:
+               kn->kn_fop = &fuseread_filtops;
+               break;
+       case EVFILT_WRITE:
+               kn->kn_fop = &fusewrite_filtops;
+               break;
+       case EVFILT_VNODE:
+               kn->kn_fop = &fusevnode_filtops;
+               break;
+       default:
+               return EOPNOTSUPP;
+       }
+
+       kn->kn_hook = (caddr_t)vp;
+       knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
+
+       return 0;
+}
+
+static void
+filt_fusedetach(struct knote *kn)
+{
+       struct vnode *vp = (void*)kn->kn_hook;
+
+       knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
+}
+
+static int
+filt_fuseread(struct knote *kn, long hint)
+{
+       struct vnode *vp = (void*)kn->kn_hook;
+       struct fuse_node *fnp = VTOI(vp);
+       off_t off;
+
+       if (hint == NOTE_REVOKE) {
+               kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
+               return 1;
+       }
+
+       /*
+        * Interlock against MP races when performing this function.
+        */
+       mtx_lock(&fnp->node_lock);
+       off = fnp->size - kn->kn_fp->f_offset;
+       kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
+       if (kn->kn_sfflags & NOTE_OLDAPI) {
+               mtx_unlock(&fnp->node_lock);
+               return 1;
+       }
+       if (!kn->kn_data)
+               kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
+       mtx_unlock(&fnp->node_lock);
+
+       return kn->kn_data != 0;
+}
+
+static int
+filt_fusewrite(struct knote *kn, long hint)
+{
+       if (hint == NOTE_REVOKE)
+               kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
+       kn->kn_data = 0;
+
+       return 1;
+}
+
+static int
+filt_fusevnode(struct knote *kn, long hint)
+{
+       if (kn->kn_sfflags & hint)
+               kn->kn_fflags |= hint;
+       if (hint == NOTE_REVOKE) {
+               kn->kn_flags |= (EV_EOF | EV_NODATA);
+               return 1;
+       }
+
+       return kn->kn_fflags != 0;
+}
+
+struct vop_ops fuse_vnode_vops = {
+       .vop_default =          vop_defaultop,
+       .vop_access =           fuse_vop_access,
+       .vop_open =             fuse_vop_open,
+       .vop_close =            fuse_vop_close,
+       .vop_fsync =            fuse_vop_fsync,
+       .vop_getattr =          fuse_vop_getattr,
+       .vop_setattr =          fuse_vop_setattr,
+       .vop_nresolve =         fuse_vop_nresolve,
+       //.vop_nlookupdotdot =  fuse_nlookupdotdot,
+       .vop_nlink =            fuse_vop_nlink,
+       .vop_ncreate =          fuse_vop_ncreate,
+       .vop_nmknod =           fuse_vop_nmknod,
+       .vop_nremove =          fuse_vop_nremove,
+       .vop_nmkdir =           fuse_vop_nmkdir,
+       .vop_nrmdir =           fuse_vop_nrmdir,
+       .vop_pathconf =         fuse_vop_pathconf,
+       .vop_readdir =          fuse_vop_readdir,
+       .vop_readlink =         fuse_vop_readlink,
+       .vop_nrename =          fuse_vop_nrename,
+       .vop_nsymlink =         fuse_vop_nsymlink,
+       .vop_read =             fuse_vop_read,
+       .vop_write =            fuse_vop_write,
+       .vop_strategy =         fuse_vop_strategy,
+       .vop_bmap =             fuse_bmap,
+       //.vop_advlock =        fuse_advlock,
+       .vop_print =            fuse_vop_print,
+       .vop_inactive =         fuse_vop_inactive,
+       .vop_reclaim =          fuse_vop_reclaim,
+       .vop_mountctl =         fuse_vop_mountctl,
+       .vop_kqfilter =         fuse_kqfilter,
+       .vop_getpages =         vop_stdgetpages,
+       .vop_putpages =         vop_stdputpages,
+};
+
+struct vop_ops fuse_spec_vops = {
+       .vop_default =          vop_defaultop,
+       .vop_access =           fuse_vop_access,
+       .vop_close =            fuse_vop_close,
+       .vop_fsync =            fuse_vop_fsync,
+       .vop_getattr =          fuse_vop_getattr,
+       .vop_setattr =          fuse_vop_setattr,
+       .vop_read =             vop_stdnoread,
+       .vop_write =            vop_stdnowrite,
+       //.vop_markatime =      fuse_vop_markatime,
+       .vop_print =            fuse_vop_print,
+       .vop_inactive =         fuse_vop_inactive,
+       .vop_reclaim =          fuse_vop_reclaim,
+};