From: Matthew Dillon Date: Sat, 1 Aug 2009 16:43:36 +0000 (-0700) Subject: DEVFS - Bring Alex's GSOC DEVFS core into the main tree X-Git-Tag: v2.4.0~382 X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/21864bc579750a64b812ace90eb937fa5d8b31e5 DEVFS - Bring Alex's GSOC DEVFS core into the main tree This is a rollup commit bringing in Alex Hornung's GSOC DEVFS project core in. This is the VFS portion of the project. Author: Alex Hornung Submitted-by: Alex Hornung --- diff --git a/sys/vfs/devfs/Makefile b/sys/vfs/devfs/Makefile new file mode 100644 index 0000000000..d3d062b4b7 --- /dev/null +++ b/sys/vfs/devfs/Makefile @@ -0,0 +1,4 @@ +KMOD= devfs +SRCS= devfs_core.c devfs_vfsops.c devfs_vnops.c + +.include diff --git a/sys/vfs/devfs/devfs.h b/sys/vfs/devfs/devfs.h new file mode 100644 index 0000000000..9199227868 --- /dev/null +++ b/sys/vfs/devfs/devfs.h @@ -0,0 +1,420 @@ +/* + * Copyright (c) 2009 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Alex Hornung + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#ifndef _VFS_DEVFS_H_ +#define _VFS_DEVFS_H_ + +#if !defined(_KERNEL) && !defined(_KERNEL_STRUCTURES) +#error "This file should not be included by userland programs." +#endif + +#ifndef _SYS_QUEUE_H_ +#include +#endif +#ifndef _SYS_LOCK_H_ +#include +#endif +#ifndef _SYS_CONF_H_ +#include +#endif +#ifndef _SYS_MSGPORT2_H_ +//#include +#endif +#ifndef _SYS_MSGPORT_H_ +#include +#endif +#ifndef _SYS_DIRENT_H_ +#include +#endif +#ifndef _SYS_DEVICE_H_ +#include +#endif +#ifndef _SYS_UCRED_H_ +#include +#endif + + +typedef enum { + Proot, /* the filesystem root */ + Plink, + Preg, + Pdir, + Pdev +} devfs_nodetype; + +/* +struct devfs_dev { + cdev_t d_devt; + uint16_t d_flags; + uint16_t d_class; + + //XXX: do I need a use count so I know when it is safe to + // unload the module or detach or whatever? + TAILQ_ENTRY(devfs_dev) link; +}; +*/ + +struct devfs_node { + cdev_t d_dev; /* device assoicated with this node */ + + struct mount *mp; /* mount point of this node */ + struct dirent d_dir; /* dirent data (name, inode, ...) */ + struct vnode *v_node; /* assoicated vnode */ + struct devfs_node *parent; /* parent of this node */ + devfs_nodetype node_type; /* devfs node type */ + + u_int64_t refs; /* number of open references */ + size_t nchildren; /* number of children of a parent */ + u_int64_t cookie_jar; /* cookie pool for children */ + u_int64_t cookie; /* directory entry cookie for readdir */ + + struct devfs_node *link_target; /* target of this autolink-type node */ + size_t nlinks; /* number of links that point to this node */ + + char *symlink_name; /* symlink name for readlink */ + size_t symlink_namelen; /* symlink name length for readlink */ + + u_short mode; /* files access mode and type */ + uid_t uid; /* owner user id */ + gid_t gid; /* owner group id */ + u_long flags; + + struct timespec atime; /* time of last access */ + struct timespec mtime; /* time of last modification */ + struct timespec ctime; /* time file changed */ + + + + /* Other members */ + TAILQ_ENTRY(devfs_node) link; + TAILQ_HEAD(, devfs_node) list; /* linked list of children */ +}; + +struct devfs_orphan { + struct devfs_node *node; + + TAILQ_ENTRY(devfs_orphan) link; +}; + +struct devfs_mnt_data { + TAILQ_HEAD(, devfs_orphan) orphan_list; + + struct devfs_node *root_node; + struct mount *mp; + + uint32_t mnt_type; + size_t leak_count; + + int jailed; + size_t mntonnamelen; + TAILQ_ENTRY(devfs_mnt_data) link; +}; + +struct devfs_clone_handler { + char name[128]; + u_char namlen; + d_clone_t *nhandler; + + TAILQ_ENTRY(devfs_clone_handler) link; +}; + + +struct devfs_alias { + char name[PATH_MAX + 1]; + cdev_t dev_target; + + TAILQ_ENTRY(devfs_alias) link; +}; + + +typedef struct devfs_msg { + struct lwkt_msg hdr; + __uint32_t id; + + union { + struct { + cdev_t dev; + uid_t uid; + gid_t gid; + int perms; + } __m_dev; + struct { + struct devfs_mnt_data *mnt; + } __m_mnt; + struct { + char *name; + d_clone_t *nhandler; + } __m_chandler; + struct { + void *load; + } __m_gen; + struct { + void *resp; + } __m_resp; + struct { + udev_t udev; + } __m_udev; + struct { + cdev_t cdev; + } __m_cdev; + struct { + char *name; + } __m_name; + struct { + char *basename; + u_char unit; + struct vnode *vp; + } __m_clone; + struct { + struct devfs_node *node; + } __m_node; + struct { + char *name; + char *target; + struct mount *mp; + } __m_link; + struct { + struct dev_ops *ops; + int minor; + } __m_ops; + } __m_u; + +#define m_chandler __m_u.__m_chandler +#define m_mnt __m_u.__m_mnt.mnt +#define m_load __m_u.__m_gen.load +#define m_response __m_u.__m_resp.resp +#define m_dev __m_u.__m_dev +#define m_link __m_u.__m_link +#define m_udev __m_u.__m_udev.udev +#define m_cdev __m_u.__m_cdev.cdev +#define m_name __m_u.__m_name.name +#define m_clone __m_u.__m_clone +#define m_node __m_u.__m_node.node +#define m_ops __m_u.__m_ops + +} *devfs_msg_t; + +typedef struct devfs_core_args { + thread_t td; +} *devfs_core_args_t; + + +TAILQ_HEAD(devfs_node_head, devfs_node); +TAILQ_HEAD(devfs_dev_head, cdev); +TAILQ_HEAD(devfs_mnt_head, devfs_mnt_data); +TAILQ_HEAD(devfs_chandler_head, devfs_clone_handler); +TAILQ_HEAD(devfs_alias_head, devfs_alias); + +typedef void (devfs_scan_t)(cdev_t); + + +#define DEVFS_NODE(x) ((struct devfs_node *)((x)->v_data)) +#define DEVFS_MNTDATA(x) ((struct devfs_mnt_data *)((x)->mnt_data)) +#define DEVFS_ORPHANLIST(x) (&(DEVFS_MNTDATA(x)->orphan_list)) +#define DEVFS_DENODE_HEAD(x) (&((x)->list)) +#define DEVFS_ISDIGIT(x) ((x >= '0') && (x <= '9')) + +#define DEVFS_DEFAULT_MODE ((VREAD|VWRITE|VEXEC) | ((VREAD|VEXEC)>>3) | ((VREAD|VEXEC)>>6)); /* -rwxr-xr-x */ +#define DEVFS_DEFAULT_UID 0 /* root */ +#define DEVFS_DEFAULT_GID 5 /* operator */ + +//#define DEVFS_DEFAULT_FLAGS 0 + +/* + * debug levels + */ +#define DEVFS_DEBUG_SHOW 0x00 +#define DEVFS_DEBUG_WARNING 0x01 +#define DEVFS_DEBUG_INFO 0x02 +#define DEVFS_DEBUG_DEBUG 0x03 + +/* + * Message ids + */ +#define DEVFS_TERMINATE_CORE 0x01 +#define DEVFS_DEVICE_CREATE 0x02 +#define DEVFS_DEVICE_DESTROY 0x03 +#define DEVFS_MOUNT_ADD 0x04 +#define DEVFS_MOUNT_DEL 0x05 +#define DEVFS_CREATE_ALL_DEV 0x06 +#define DEVFS_DESTROY_SUBNAMES 0x07 +#define DEVFS_DESTROY_DEV_BY_OPS 0x08 +#define DEVFS_CHANDLER_ADD 0x09 +#define DEVFS_CHANDLER_DEL 0x0A +#define DEVFS_FIND_DEVICE_BY_UDEV 0x0B +#define DEVFS_FIND_DEVICE_BY_NAME 0x0C +#define DEVFS_MAKE_ALIAS 0x0D +#define DEVFS_APPLY_RULES 0x0F +#define DEVFS_RESET_RULES 0x10 +#define DEVFS_SCAN_CALLBACK 0x11 +#define DEVFS_SYNC 0x99 + +/* + * Node flags + */ +#define DEVFS_NODE_LINKED 0x01 /* Node is linked into topology */ +#define DEVFS_USER_CREATED 0x02 /* Node was user-created */ +#define DEVFS_NO_TRACE 0x04 /* Don't trace orphanage */ +#define DEVFS_CLONED 0x08 /* Node was created by the clone code */ +#define DEVFS_HIDDEN 0x10 /* Makes node inaccessible, apart from already allocated vnodes*/ +#define DEVFS_INVISIBLE 0x20 /* Makes node invisible in a readdir() */ +#define DEVFS_PTY 0x40 /* Node is linked to a PTY device */ +//#define DEVFS_LINK 0x20 + + +/* + * Clone helper stuff + */ +#define DEVFS_UNIT_HSIZE 64 /* power of 2 */ +#define DEVFS_UNIT_HMASK (DEVFS_UNIT_HSIZE - 1) +#define DEVFS_CLONE_HASHLIST(name) devfs_ ## name ## _clone_hashlist +#define DEVFS_DECLARE_CLONE_HASHLIST(name) struct devfs_unit_hash* DEVFS_CLONE_HASHLIST(name) [DEVFS_UNIT_HSIZE] + + +#define DEVFS_BITMAP_INITIAL_SIZE 1 +#define DEVFS_CLONE_BITMAP(name) devfs_ ## name ## _clone_bitmap +#define DEVFS_DECLARE_CLONE_BITMAP(name) struct devfs_bitmap DEVFS_CLONE_BITMAP(name) +#define devfs_clone_bitmap_put devfs_clone_bitmap_rst + +struct devfs_bitmap { + int chunks; + unsigned long *bitmap; +}; + +struct devfs_unit_hash { + struct devfs_unit_hash *next; + int unit_no; + + cdev_t dev; +}; + +struct devfs_clone_helper { + DEVFS_DECLARE_CLONE_HASHLIST(generic); + DEVFS_DECLARE_CLONE_BITMAP(generic); +}; + +#define DEVFS_CLONE_HELPER(name) devfs_ ## name ## _clone_helper +#define DEVFS_DECLARE_CLONE_HELPER(name) static struct devfs_clone_helper DEVFS_CLONE_HELPER(name) + + +void devfs_clone_bitmap_init(struct devfs_bitmap *); +void devfs_clone_bitmap_uninit(struct devfs_bitmap *); +void devfs_clone_bitmap_resize(struct devfs_bitmap *, int); +int devfs_clone_bitmap_fff(struct devfs_bitmap *); +void devfs_clone_bitmap_set(struct devfs_bitmap *, int); +void devfs_clone_bitmap_rst(struct devfs_bitmap *, int); +int devfs_clone_bitmap_get(struct devfs_bitmap *, int); +int devfs_clone_bitmap_chk(struct devfs_bitmap *, int); + +void devfs_clone_helper_init(struct devfs_clone_helper *); +void devfs_clone_helper_uninit(struct devfs_clone_helper *); +int devfs_clone_helper_insert(struct devfs_clone_helper *, cdev_t); +int devfs_clone_helper_remove(struct devfs_clone_helper *, int); + + +/* + * Prototypes + */ +int devfs_debug(int level, char *fmt, ...); +int devfs_allocv(struct vnode **, struct devfs_node *); +struct devfs_node *devfs_allocp(devfs_nodetype, char *, struct devfs_node *, struct mount *, cdev_t); +int devfs_allocvp(struct mount *, struct vnode **, devfs_nodetype, char *, struct devfs_node *, cdev_t); + +int devfs_freep(struct devfs_node *); +int devfs_reaperp(struct devfs_node *); + +int devfs_unlinkp(struct devfs_node *); + +void devfs_tracer_add_orphan(struct devfs_node *); +void devfs_tracer_del_orphan(struct devfs_node *); +size_t devfs_tracer_orphan_count(struct mount *, int); + +int devfs_set_perms(struct devfs_node *, uid_t, gid_t, u_short, u_long); +int devfs_gc(struct devfs_node *); + +int devfs_create_dev(cdev_t, uid_t, gid_t, int); +int devfs_destroy_dev(cdev_t); + +devfs_msg_t devfs_msg_send_sync(uint32_t, devfs_msg_t); +__uint32_t devfs_msg_send(uint32_t, devfs_msg_t); +__uint32_t devfs_msg_send_dev(uint32_t, cdev_t dev, uid_t, gid_t, int); +__uint32_t devfs_msg_send_mount(uint32_t, struct devfs_mnt_data *); +__uint32_t devfs_msg_send_ops(uint32_t, struct dev_ops *, int); +__uint32_t devfs_msg_send_chandler(uint32_t, char *, d_clone_t); +__uint32_t devfs_msg_send_generic(uint32_t, void *); +__uint32_t devfs_msg_send_name(uint32_t, char *); +__uint32_t devfs_msg_send_link(uint32_t, char *, char *, struct mount *); + +devfs_msg_t devfs_msg_get(void); +int devfs_msg_put(devfs_msg_t); + +int devfs_mount_add(struct devfs_mnt_data *); +int devfs_mount_del(struct devfs_mnt_data *); + +int devfs_create_all_dev(struct devfs_node *); + +struct devfs_node *devfs_resolve_or_create_path(struct devfs_node *, char *, int); +int devfs_resolve_name_path(char *, char *, char **, char **); +struct devfs_node *devfs_create_device_node(struct devfs_node *, cdev_t, char *, char *, ...); + +int devfs_destroy_device_node(struct devfs_node *, cdev_t); +int devfs_destroy_subnames(char *); +int devfs_destroy_dev_by_ops(struct dev_ops *, int); +struct devfs_node *devfs_find_device_node(struct devfs_node *, cdev_t); +struct devfs_node *devfs_find_device_node_by_name(struct devfs_node *, char *); + +cdev_t devfs_new_cdev(struct dev_ops *, int); +int devfs_destroy_cdev(cdev_t); + +cdev_t devfs_find_device_by_name(const char *, ...); +cdev_t devfs_find_device_by_udev(udev_t); + +int devfs_clone_handler_add(char *, d_clone_t *); +int devfs_clone_handler_del(char *); +int devfs_clone(char *, size_t *, cdev_t *, int, struct ucred *); + +int devfs_link_dev(cdev_t); +int devfs_unlink_dev(cdev_t); + +int devfs_make_alias(char *, cdev_t); + +int devfs_alias_create(char *name_orig, struct devfs_node *target); + +int devfs_apply_rules(char *); +int devfs_reset_rules(char *); + +int devfs_scan_callback(devfs_scan_t *); +int devfs_node_to_path(struct devfs_node *, char *); + +void devfs_config(void *); +#endif /* _VFS_DEVFS_H_ */ diff --git a/sys/vfs/devfs/devfs_core.c b/sys/vfs/devfs/devfs_core.c new file mode 100644 index 0000000000..2a03517ed5 --- /dev/null +++ b/sys/vfs/devfs/devfs_core.c @@ -0,0 +1,2193 @@ +/* + * Copyright (c) 2009 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Alex Hornung + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MALLOC_DEFINE(M_DEVFS, "devfs", "Device File System (devfs) allocations"); + +/* + * SYSREF Integration - reference counting, allocation, + * sysid and syslink integration. + */ +static void devfs_cdev_terminate(cdev_t dev); +static struct sysref_class cdev_sysref_class = { + .name = "cdev", + .mtype = M_DEVFS, + .proto = SYSREF_PROTO_DEV, + .offset = offsetof(struct cdev, si_sysref), + .objsize = sizeof(struct cdev), + .mag_capacity = 32, + .flags = 0, + .ops = { + .terminate = (sysref_terminate_func_t)devfs_cdev_terminate + } +}; + +static struct objcache *devfs_node_cache; +static struct objcache *devfs_msg_cache; +static struct objcache *devfs_dev_cache; + +static struct objcache_malloc_args devfs_node_malloc_args = { + sizeof(struct devfs_node), M_DEVFS }; +struct objcache_malloc_args devfs_msg_malloc_args = { + sizeof(struct devfs_msg), M_DEVFS }; +struct objcache_malloc_args devfs_dev_malloc_args = { + sizeof(struct cdev), M_DEVFS }; + +static struct devfs_dev_head devfs_dev_list = TAILQ_HEAD_INITIALIZER(devfs_dev_list); +static struct devfs_mnt_head devfs_mnt_list = TAILQ_HEAD_INITIALIZER(devfs_mnt_list); +static struct devfs_chandler_head devfs_chandler_list = TAILQ_HEAD_INITIALIZER(devfs_chandler_list); +static struct devfs_alias_head devfs_alias_list = TAILQ_HEAD_INITIALIZER(devfs_alias_list); + +struct lock devfs_lock; +static struct lwkt_port devfs_dispose_port; +static struct lwkt_port devfs_msg_port; +static struct thread *td_core; +//static void *devfs_id = (void *)0xDE33A; + +static ino_t d_ino = 0; +static __uint32_t msg_id = 0; +static struct spinlock ino_lock; +static int devfs_debug_enable = 0; + +static ino_t devfs_fetch_ino(void); +static int devfs_gc_dirs(struct devfs_node *); +static int devfs_gc_links(struct devfs_node *, struct devfs_node *, size_t); +static int devfs_create_all_dev_worker(struct devfs_node *); +static int devfs_create_dev_worker(cdev_t, uid_t, gid_t, int); +static int devfs_destroy_dev_worker(cdev_t); +static int devfs_destroy_subnames_worker(char *); +static int devfs_destroy_dev_by_ops_worker(struct dev_ops *, int); +static int devfs_propagate_dev(cdev_t, int); + +static int devfs_chandler_add_worker(char *, d_clone_t *); +static int devfs_chandler_del_worker(char *); + +static void devfs_msg_autofree_reply(lwkt_port_t, lwkt_msg_t); +static void devfs_msg_core(void *); + +static int devfs_find_device_by_name_worker(devfs_msg_t); +static int devfs_find_device_by_udev_worker(devfs_msg_t); + +static int devfs_apply_reset_rules_caller(char *, int); +static int devfs_apply_reset_rules_worker(struct devfs_node *, int); + +static int devfs_scan_callback_worker(devfs_scan_t *); + +static struct devfs_node *devfs_resolve_or_create_dir(struct devfs_node *, char *, size_t, int); + +static int devfs_make_alias_worker(struct devfs_alias *); +static int devfs_alias_remove(cdev_t); +static int devfs_alias_reap(void); +static int devfs_alias_propagate(struct devfs_alias *); +static int devfs_alias_apply(struct devfs_node *, struct devfs_alias *); +static int devfs_alias_check_create(struct devfs_node *); + +/* + * devfs_debug() is a SYSCTL and TUNABLE controlled debug output function using kvprintf + */ +int +devfs_debug(int level, char *fmt, ...) +{ + __va_list ap; + + __va_start(ap, fmt); + if (level <= devfs_debug_enable) + kvprintf(fmt, ap); + __va_end(ap); + + return 0; +} + +/* + * devfs_allocp() Allocates a new devfs node with the specified parameters. The node is also automatically linked + * into the topology if a parent is specified. It also calls the rule and alias stuff to be applied on the new + * node + */ +struct devfs_node * +devfs_allocp(devfs_nodetype devfsnodetype, char *name, struct devfs_node *parent, struct mount *mp, cdev_t dev) +{ + struct devfs_node *node = NULL; + size_t namlen = strlen(name); + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_allocp -1- for %s\n", name?name:"NULL"); + + node = objcache_get(devfs_node_cache, M_WAITOK); + + atomic_add_int(&(DEVFS_MNTDATA(mp)->leak_count), 1); + + node->nchildren = 1; + node->mp = mp; + node->d_dir.d_ino = devfs_fetch_ino(); + node->flags = 0; + node->cookie_jar = 2; /* Leave 0 and 1 for '.' and '..', respectively */ + + /* Access Control members */ + node->mode = DEVFS_DEFAULT_MODE; /* files access mode and type */ + node->uid = DEVFS_DEFAULT_UID; /* owner user id */ + node->gid = DEVFS_DEFAULT_GID; /* owner group id */ + + /* Null the symlink */ + node->symlink_name = NULL; + node->symlink_namelen = 0; + node->link_target = NULL; + + /* Null the count of links to this node */ + node->nlinks = 0; + + switch (devfsnodetype) { + case Proot: + node->flags |= DEVFS_NODE_LINKED; //Make sure we don't recycle the root vnode + case Pdir: + TAILQ_INIT(DEVFS_DENODE_HEAD(node)); + node->d_dir.d_type = DT_DIR; + node->nchildren = 2; + break; + + case Plink: + node->d_dir.d_type = DT_LNK; + break; + + case Preg: + node->d_dir.d_type = DT_REG; + break; + + case Pdev: + if (dev != NULL) { + node->d_dir.d_type = DT_CHR; + node->d_dev = dev; + node->d_dir.d_ino = dev->si_inode; + + node->mode = dev->si_perms; /* files access mode and type */ + node->uid = dev->si_uid; /* owner user id */ + node->gid = dev->si_gid; /* owner group id */ + + devfs_alias_check_create(node); + } + break; + + default: + panic("devfs_allocp: unknown node type"); + } + + node->v_node = NULL; + node->node_type = devfsnodetype; + + /* Init the dirent structure of each devfs vnode */ + node->d_dir.d_namlen = namlen; + memcpy(node->d_dir.d_name, name, namlen); + node->d_dir.d_name[namlen] = '\0'; + + /* Initialize the parent node element */ + node->parent = parent; + + /* Apply rules */ + devfs_rule_check_apply(node); + + /* If there is a parent, increment the number of his children and add the new + * child to the parent's list of children */ + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_allocp: about to insert node\n"); + if ((parent != NULL) && + ((parent->node_type == Proot) || (parent->node_type == Pdir))) { + TAILQ_INSERT_TAIL(DEVFS_DENODE_HEAD(parent), node, link); + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_allocp: node inserted\n"); + parent->nchildren++; + node->cookie = parent->cookie_jar++; + node->flags |= DEVFS_NODE_LINKED; + } + + /* xtime members */ + nanotime(&node->atime); + node->mtime = node->ctime = node->atime; + + /* Null out open references to this "file" */ + node->refs = 0; + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_allocp -end:2-\n"); + return node; +} + +/* + * devfs_allocv() allocates a new vnode based on a devfs node. + */ +int +devfs_allocv(struct vnode **vpp, struct devfs_node *node) +{ + struct vnode *vp; + int error = 0; + + KKASSERT(node); + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_allocv -1-\n"); + +try_again: + while ((vp = node->v_node) != NULL) { + error = vget(vp, LK_EXCLUSIVE); + if (error != ENOENT) { + *vpp = vp; + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_allocv, code path 2...\n"); + goto out; + } + } + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_allocv -3-\n"); + + //XXX: afaik getnewvnode doesn't return anything but 0. + + if ((error = getnewvnode(VT_DEVFS, node->mp, vpp, 0, 0)) != 0) + goto out; + + vp = *vpp; + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_allocv -4-\n"); + + if (node->v_node != NULL) { + vp->v_type = VBAD; + vx_put(vp); + goto try_again; + } + + vp->v_data = node; + node->v_node = vp; + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_allocv -5-\n"); + + switch (node->node_type) { + case Proot: + vp->v_flag |= VROOT; + case Pdir: + vp->v_type = VDIR; + break; + + case Plink: + vp->v_type = VLNK; + break; + + case Preg: + vp->v_type = VREG; + break; + + case Pdev: + vp->v_type = VCHR; + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_allocv -6-\n"); + KKASSERT(node->d_dev); + + if (node->d_dev) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_allocv -7-\n"); + vp->v_uminor = node->d_dev->si_uminor; + vp->v_umajor = 0; + vp->v_rdev = node->d_dev; + vp->v_ops = &node->mp->mnt_vn_spec_ops; + //v_associate_rdev(vp, node->d_dev); + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_allocv -8-\n"); + } else { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_allocv: type is Pdev but d_dev is not set!!!!\n"); + } + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_allocv -9-\n"); + break; + + default: + panic("devfs_allocv: unknown node type"); + } + +out: + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_allocv -10-\n"); + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_allocv -end:11-\n"); + return error; +} + +/* + * devfs_allocvp allocates both a devfs node (with the given settings) and a vnode + * based on the newly created devfs node. + */ +int +devfs_allocvp(struct mount *mp, struct vnode **vpp, devfs_nodetype devfsnodetype, + char *name, struct devfs_node *parent, cdev_t dev) +{ + struct devfs_node *node; + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_allocvp -1-\n"); + node = devfs_allocp(devfsnodetype, name, parent, mp, dev); + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_allocvp -2-\n"); + if (node != NULL) + devfs_allocv(vpp, node); + else + *vpp = NULL; + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_allocvp -end:3-\n"); + + return 0; +} + +/* + * devfs_freep frees a devfs node *ONLY* if it is the root node or the node is not linked + * into the topology anymore. It also calls the tracer helper to keep track of possible + * orphans. + */ +int +devfs_freep(struct devfs_node *node) +{ + KKASSERT(node); + KKASSERT(((node->flags & DEVFS_NODE_LINKED) == 0) || (node->node_type == Proot)); + + atomic_subtract_int(&(DEVFS_MNTDATA(node->mp)->leak_count), 1); + if (node->symlink_name) { + kfree(node->symlink_name, M_DEVFS); + node->symlink_name = NULL; + } + + if ((node->flags & DEVFS_NO_TRACE) == 0) + devfs_tracer_del_orphan(node); + + //XXX: Add something to make sure that no vnode is associated with this devfs node + objcache_put(devfs_node_cache, node); + + return 0; +} + +/* + * devfs_unlinkp unlinks a devfs node out of the topology and adds the node + * to the orphan list. It is later removed by freep. + * If a vnode is still associated to the devfs node, then the vnode's rdev + * is NULLed. + */ +int +devfs_unlinkp(struct devfs_node *node) +{ + struct devfs_node *parent; + KKASSERT(node); + + devfs_tracer_add_orphan(node); + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_unlinkp for %s\n", node->d_dir.d_name); + parent = node->parent; + + /* If the parent is known we can unlink the node out of the topology */ + if (parent) { + TAILQ_REMOVE(DEVFS_DENODE_HEAD(parent), node, link); + parent->nchildren--; + KKASSERT((parent->nchildren >= 0)); + node->flags &= ~DEVFS_NODE_LINKED; + } + node->parent = NULL; + + /* Invalidate vnode as a device node */ + if (node->v_node) + node->v_node->v_rdev = NULL; + + return 0; +} + +/* + * devfs_reaperp() is a recursive function that iterates through all the topology, + * unlinking and freeing all devfs nodes. + */ +int +devfs_reaperp(struct devfs_node *node) +{ + struct devfs_node *node1, *node2; + + //devfs_debug(DEVFS_DEBUG_DEBUG, "This node is called %s\n", node->d_dir.d_name); + if ((node->node_type == Proot) || (node->node_type == Pdir)) { + devfs_debug(DEVFS_DEBUG_DEBUG, "This node is Pdir or Proot; has %d children\n", node->nchildren); + if (node->nchildren > 2) { + TAILQ_FOREACH_MUTABLE(node1, DEVFS_DENODE_HEAD(node), link, node2) { + devfs_reaperp(node1); + } + } + } + //devfs_debug(DEVFS_DEBUG_DEBUG, "This node is called %s and it is being freed\n", node->d_dir.d_name); + devfs_unlinkp(node); + devfs_freep(node); + + return 0; +} + +/* + * devfs_gc() is devfs garbage collector. It takes care of unlinking and freeing a + * node, but also removes empty directories and links that link via devfs auto-link + * mechanism to the node being deleted. + */ +int +devfs_gc(struct devfs_node *node) +{ + struct devfs_node *root_node = DEVFS_MNTDATA(node->mp)->root_node; + + devfs_gc_links(root_node, node, node->nlinks); + devfs_unlinkp(node); + devfs_gc_dirs(root_node); + + devfs_freep(node); + + return 0; +} + +/* + * devfs_gc_dirs() is a helper function for devfs_gc, unlinking and freeing + * empty directories. + */ +static int +devfs_gc_dirs(struct devfs_node *node) +{ + struct devfs_node *node1, *node2; + + //devfs_debug(DEVFS_DEBUG_DEBUG, "This node is called %s\n", node->d_dir.d_name); + + if ((node->node_type == Proot) || (node->node_type == Pdir)) { + devfs_debug(DEVFS_DEBUG_DEBUG, "This node is Pdir or Proot; has %d children\n", node->nchildren); + if (node->nchildren > 2) { + TAILQ_FOREACH_MUTABLE(node1, DEVFS_DENODE_HEAD(node), link, node2) { + devfs_gc_dirs(node1); + } + } + + if (node->nchildren == 2) { + devfs_debug(DEVFS_DEBUG_DEBUG, "This node is called %s and it is empty\n", node->d_dir.d_name); + devfs_unlinkp(node); + devfs_freep(node); + } + } + + return 0; +} + +/* + * devfs_gc_links() is a helper function for devfs_gc, unlinking and freeing + * eauto-linked nodes linking to the node being deleted. + */ +static int +devfs_gc_links(struct devfs_node *node, struct devfs_node *target, size_t nlinks) +{ + struct devfs_node *node1, *node2; + + if (nlinks > 0) { + if ((node->node_type == Proot) || (node->node_type == Pdir)) { + devfs_debug(DEVFS_DEBUG_DEBUG, "This node is Pdir or Proot; has %d children\n", node->nchildren); + if (node->nchildren > 2) { + TAILQ_FOREACH_MUTABLE(node1, DEVFS_DENODE_HEAD(node), link, node2) { + nlinks = devfs_gc_links(node1, target, nlinks); + } + } + } else if (node->link_target == target) { + nlinks--; + devfs_unlinkp(node); + devfs_freep(node); + } + } + + KKASSERT(nlinks >= 0); + + return nlinks; +} + +/* + * devfs_create_dev() is the asynchronous entry point for device creation. It + * just sends a message with the relevant details to the devfs core. + */ +int +devfs_create_dev(cdev_t dev, uid_t uid, gid_t gid, int perms) +{ + __uint64_t id; + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_create_dev -1-, name: %s (%p)\n", dev->si_name, dev); + id = devfs_msg_send_dev(DEVFS_DEVICE_CREATE, dev, uid, gid, perms); + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_create_dev -end:2- (unique id: %x) / (%p)\n", id, dev); + return 0; +} + +/* + * devfs_destroy_dev() is the asynchronous entry point for device destruction. It + * just sends a message with the relevant details to the devfs core. + */ +int +devfs_destroy_dev(cdev_t dev) +{ + devfs_msg_send_dev(DEVFS_DEVICE_DESTROY, dev, 0, 0, 0); + return 0; +} + +/* + * devfs_mount_add() is the synchronous entry point for adding a new devfs mount. + * It sends a synchronous message with the relevant details to the devfs core. + */ +int +devfs_mount_add(struct devfs_mnt_data *mnt) +{ + devfs_msg_t msg; + + msg = devfs_msg_get(); + msg->m_mnt = mnt; + msg = devfs_msg_send_sync(DEVFS_MOUNT_ADD, msg); + devfs_msg_put(msg); + + return 0; +} + +/* + * devfs_mount_del() is the synchronous entry point for removing a devfs mount. + * It sends a synchronous message with the relevant details to the devfs core. + */ +int +devfs_mount_del(struct devfs_mnt_data *mnt) +{ + devfs_msg_t msg; + + msg = devfs_msg_get(); + msg->m_mnt = mnt; + msg = devfs_msg_send_sync(DEVFS_MOUNT_DEL, msg); + devfs_msg_put(msg); + + return 0; +} + +/* + * devfs_destroy_subnames() is the asynchronous entry point for device destruction + * by subname. It just sends a message with the relevant details to the devfs core. + */ +int +devfs_destroy_subnames(char *name) +{ + devfs_msg_send_generic(DEVFS_DESTROY_SUBNAMES, name); + return 0; +} + +/* + * devfs_create_all_dev is the asynchronous entry point to trigger device node creation. + * It just sends a message with the relevant details to the devfs core. + */ +int +devfs_create_all_dev(struct devfs_node *root) +{ + devfs_msg_send_generic(DEVFS_CREATE_ALL_DEV, root); + return 0; +} + +/* + * devfs_destroy_dev_by_ops is the asynchronous entry point to destroy all devices with + * a specific set of dev_ops and minor. + * It just sends a message with the relevant details to the devfs core. + */ +int +devfs_destroy_dev_by_ops(struct dev_ops *ops, int minor) +{ + devfs_msg_send_ops(DEVFS_DESTROY_DEV_BY_OPS, ops, minor); + return 0; +} + +/* + * devfs_clone_handler_add is the asynchronous entry point to add a new clone handler. + * It just sends a message with the relevant details to the devfs core. + */ +int +devfs_clone_handler_add(char *name, d_clone_t *nhandler) +{ + devfs_msg_send_chandler(DEVFS_CHANDLER_ADD, name, nhandler); + return 0; +} + +/* + * devfs_clone_handler_del is the asynchronous entry point to remove a clone handler. + * It just sends a message with the relevant details to the devfs core. + */ +int +devfs_clone_handler_del(char *name) +{ + devfs_msg_send_chandler(DEVFS_CHANDLER_DEL, name, NULL); + return 0; +} + +/* + * devfs_find_device_by_name is the synchronous entry point to find a device given + * its name. + * It sends a synchronous message with the relevant details to the devfs core and + * returns the answer. + */ +cdev_t +devfs_find_device_by_name(const char *fmt, ...) +{ + cdev_t found = NULL; + devfs_msg_t msg; + char target[PATH_MAX+1]; + __va_list ap; + int i; + + if (fmt == NULL) + return NULL; + + + __va_start(ap, fmt); + i = kvcprintf(fmt, NULL, target, 10, ap); + target[i] = '\0'; + __va_end(ap); + + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_find_device_by_name: %s -1-\n", target); + msg = devfs_msg_get(); + msg->m_name = target; + msg = devfs_msg_send_sync(DEVFS_FIND_DEVICE_BY_NAME, msg); + found = msg->m_cdev; + devfs_msg_put(msg); + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_find_device_by_name found? %s -end:2-\n", (found)?"YES":"NO"); + return found; +} + +/* + * devfs_find_device_by_udev is the synchronous entry point to find a device given + * its udev number. + * It sends a synchronous message with the relevant details to the devfs core and + * returns the answer. + */ +cdev_t +devfs_find_device_by_udev(udev_t udev) +{ + cdev_t found = NULL; + devfs_msg_t msg; + + msg = devfs_msg_get(); + msg->m_udev = udev; + msg = devfs_msg_send_sync(DEVFS_FIND_DEVICE_BY_UDEV, msg); + found = msg->m_cdev; + devfs_msg_put(msg); + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_find_device_by_udev found? %s -end:3-\n", (found)?found->si_name:"NO"); + return found; +} + +/* + * devfs_make_alias is the asynchronous entry point to register an alias for a device. + * It just sends a message with the relevant details to the devfs core. + */ +int +devfs_make_alias(char *name, cdev_t dev_target) +{ + struct devfs_alias *alias = kmalloc(sizeof(struct devfs_alias), M_DEVFS, M_WAITOK); + memcpy(alias->name, name, strlen(name) + 1); + alias->dev_target = dev_target; + + devfs_msg_send_generic(DEVFS_MAKE_ALIAS, alias); + return 0; +} + +/* + * devfs_apply_rules is the asynchronous entry point to trigger application of all rules. + * It just sends a message with the relevant details to the devfs core. + */ +int +devfs_apply_rules(char *mntto) +{ + char *new_name; + size_t namelen; + + namelen = strlen(mntto) + 1; + + new_name = kmalloc(namelen, M_DEVFS, M_WAITOK); + + memcpy(new_name, mntto, namelen); + + devfs_msg_send_name(DEVFS_APPLY_RULES, new_name); + return 0; +} + +/* + * devfs_reset_rules is the asynchronous entry point to trigger reset of all rules. + * It just sends a message with the relevant details to the devfs core. + */ +int +devfs_reset_rules(char *mntto) +{ + char *new_name; + size_t namelen; + + namelen = strlen(mntto) + 1; + + new_name = kmalloc(namelen, M_DEVFS, M_WAITOK); + + memcpy(new_name, mntto, namelen); + + devfs_msg_send_name(DEVFS_RESET_RULES, new_name); + return 0; +} + + +/* + * devfs_scan_callback is the asynchronous entry point to call a callback + * on all cdevs. + * It just sends a message with the relevant details to the devfs core. + */ +int +devfs_scan_callback(devfs_scan_t *callback) +{ + devfs_msg_t msg; + + /* Make sure that function pointers have the size of a generic pointer (innecessary) */ + KKASSERT(sizeof(callback) == sizeof(void *)); + + msg = devfs_msg_get(); + msg->m_load = callback; + msg = devfs_msg_send_sync(DEVFS_SCAN_CALLBACK, msg); + devfs_msg_put(msg); + + return 0; +} + + +/* + * Acts as a message drain. Any message that is replied to here gets destroyed and + * the memory freed. + */ +static void +devfs_msg_autofree_reply(lwkt_port_t port, lwkt_msg_t msg) +{ + devfs_msg_put((devfs_msg_t)msg); +} + +/* + * devfs_msg_get allocates a new devfs msg and returns it. + */ +devfs_msg_t +devfs_msg_get() +{ + return objcache_get(devfs_msg_cache, M_WAITOK); +} + +/* + * devfs_msg_put deallocates a given devfs msg. + */ +int +devfs_msg_put(devfs_msg_t msg) +{ + objcache_put(devfs_msg_cache, msg); + return 0; +} + +/* + * devfs_msg_send is the generic asynchronous message sending facility + * for devfs. By default the reply port is the automatic disposal port. + */ +__uint32_t +devfs_msg_send(uint32_t cmd, devfs_msg_t devfs_msg) +{ + lwkt_port_t port = &devfs_msg_port; + + lwkt_initmsg(&devfs_msg->hdr, &devfs_dispose_port, 0); + + devfs_msg->hdr.u.ms_result = cmd; + devfs_msg->id = atomic_fetchadd_int(&msg_id, 1); + + lwkt_sendmsg(port, (lwkt_msg_t)devfs_msg); + + return devfs_msg->id; +} + +/* + * devfs_msg_send_sync is the generic synchronous message sending + * facility for devfs. It initializes a local reply port and waits + * for the core's answer. This answer is then returned. + */ +devfs_msg_t +devfs_msg_send_sync(uint32_t cmd, devfs_msg_t devfs_msg) +{ + struct lwkt_port rep_port; + devfs_msg_t msg_incoming; + lwkt_port_t port = &devfs_msg_port; + + lwkt_initport_thread(&rep_port, curthread); + lwkt_initmsg(&devfs_msg->hdr, &rep_port, 0); + + devfs_msg->hdr.u.ms_result = cmd; + devfs_msg->id = atomic_fetchadd_int(&msg_id, 1); + + lwkt_sendmsg(port, (lwkt_msg_t)devfs_msg); + msg_incoming = lwkt_waitport(&rep_port, 0); + + return msg_incoming; +} + +/* + * sends a message with a generic argument. + */ +__uint32_t +devfs_msg_send_generic(uint32_t cmd, void *load) +{ + devfs_msg_t devfs_msg = devfs_msg_get(); + devfs_msg->m_load = load; + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_msg_send_generic -1- (%p)\n", load); + + return devfs_msg_send(cmd, devfs_msg); +} + +/* + * sends a message with a name argument. + */ +__uint32_t +devfs_msg_send_name(uint32_t cmd, char *name) +{ + devfs_msg_t devfs_msg = devfs_msg_get(); + devfs_msg->m_name = name; + + return devfs_msg_send(cmd, devfs_msg); +} + +/* + * sends a message with a mount argument. + */ +__uint32_t +devfs_msg_send_mount(uint32_t cmd, struct devfs_mnt_data *mnt) +{ + devfs_msg_t devfs_msg = devfs_msg_get(); + devfs_msg->m_mnt = mnt; + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_msg_send_mp -1- (%p)\n", mnt); + + return devfs_msg_send(cmd, devfs_msg); +} + +/* + * sends a message with an ops argument. + */ +__uint32_t +devfs_msg_send_ops(uint32_t cmd, struct dev_ops *ops, int minor) +{ + devfs_msg_t devfs_msg = devfs_msg_get(); + devfs_msg->m_ops.ops = ops; + devfs_msg->m_ops.minor = minor; + + return devfs_msg_send(cmd, devfs_msg); +} + +/* + * sends a message with a clone handler argument. + */ +__uint32_t +devfs_msg_send_chandler(uint32_t cmd, char *name, d_clone_t handler) +{ + devfs_msg_t devfs_msg = devfs_msg_get(); + devfs_msg->m_chandler.name = name; + devfs_msg->m_chandler.nhandler = handler; + + return devfs_msg_send(cmd, devfs_msg); +} + +/* + * sends a message with a device argument. + */ +__uint32_t +devfs_msg_send_dev(uint32_t cmd, cdev_t dev, uid_t uid, gid_t gid, int perms) +{ + devfs_msg_t devfs_msg = devfs_msg_get(); + devfs_msg->m_dev.dev = dev; + devfs_msg->m_dev.uid = uid; + devfs_msg->m_dev.gid = gid; + devfs_msg->m_dev.perms = perms; + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_msg_send_dev -1- (%p)\n", dev); + + return devfs_msg_send(cmd, devfs_msg); +} + +/* + * sends a message with a link argument. + */ +//XXX: dead code! +__uint32_t +devfs_msg_send_link(uint32_t cmd, char *name, char *target, struct mount *mp) +{ + devfs_msg_t devfs_msg = devfs_msg_get(); + devfs_msg->m_link.name = name; + devfs_msg->m_link.target = target; + devfs_msg->m_link.mp = mp; + + + return devfs_msg_send(cmd, devfs_msg); +} + +/* + * devfs_msg_core is the main devfs thread. It handles all incoming messages + * and calls the relevant worker functions. By using messages it's assured + * that events occur in the correct order. + */ +static void +devfs_msg_core(void *arg) +{ + uint8_t run = 1; + devfs_msg_t msg; + cdev_t dev; + struct devfs_mnt_data *mnt; + struct devfs_node *node; + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_msg_core -1-\n"); + lwkt_initport_thread(&devfs_msg_port, curthread); + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_msg_core -2-\n"); + wakeup(td_core/*devfs_id*/); + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_msg_core -3-\n"); + + while (run) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_msg_core -loop:4-\n"); + msg = (devfs_msg_t)lwkt_waitport(&devfs_msg_port, 0); + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_msg_core, new msg: %x (unique id: %x)\n", (unsigned int)msg->hdr.u.ms_result, msg->id); + lockmgr(&devfs_lock, LK_EXCLUSIVE); + switch (msg->hdr.u.ms_result) { + + case DEVFS_DEVICE_CREATE: + dev = msg->m_dev.dev; + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_msg_core device create msg %s (%p)\n", dev->si_name, dev); + devfs_create_dev_worker(dev, msg->m_dev.uid, msg->m_dev.gid, msg->m_dev.perms); + break; + + case DEVFS_DEVICE_DESTROY: + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_msg_core device destroy msg\n"); + dev = msg->m_dev.dev; + devfs_destroy_dev_worker(dev); + break; + + case DEVFS_DESTROY_SUBNAMES: + devfs_destroy_subnames_worker(msg->m_load); + break; + + case DEVFS_DESTROY_DEV_BY_OPS: + devfs_destroy_dev_by_ops_worker(msg->m_ops.ops, msg->m_ops.minor); + break; + + case DEVFS_CREATE_ALL_DEV: + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_msg_core device create ALL msg\n"); + node = (struct devfs_node *)msg->m_load; + devfs_create_all_dev_worker(node); + break; + + case DEVFS_MOUNT_ADD: + mnt = msg->m_mnt; + TAILQ_INSERT_TAIL(&devfs_mnt_list, mnt, link); + devfs_create_all_dev_worker(mnt->root_node); + break; + + case DEVFS_MOUNT_DEL: + mnt = msg->m_mnt; + TAILQ_REMOVE(&devfs_mnt_list, mnt, link); + devfs_debug(DEVFS_DEBUG_DEBUG, "There are still %d devfs_node elements!!!\n", mnt->leak_count); + devfs_reaperp(mnt->root_node); + devfs_debug(DEVFS_DEBUG_DEBUG, "Leaked %d devfs_node elements!!!\n", mnt->leak_count); + break; + + case DEVFS_CHANDLER_ADD: + devfs_chandler_add_worker(msg->m_chandler.name, msg->m_chandler.nhandler); + break; + + case DEVFS_CHANDLER_DEL: + devfs_chandler_del_worker(msg->m_chandler.name); + break; + + case DEVFS_FIND_DEVICE_BY_NAME: + devfs_find_device_by_name_worker(msg); + break; + + case DEVFS_FIND_DEVICE_BY_UDEV: + devfs_find_device_by_udev_worker(msg); + break; + + case DEVFS_MAKE_ALIAS: + devfs_make_alias_worker((struct devfs_alias *)msg->m_load); + break; + + case DEVFS_APPLY_RULES: + devfs_apply_reset_rules_caller(msg->m_name, 1); + break; + + case DEVFS_RESET_RULES: + devfs_apply_reset_rules_caller(msg->m_name, 0); + break; + + case DEVFS_SCAN_CALLBACK: + devfs_scan_callback_worker((devfs_scan_t *)msg->m_load); + break; + + case DEVFS_TERMINATE_CORE: + run = 0; + break; + + case DEVFS_SYNC: + break; + + default: + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_msg_core: unknown message received at core\n"); + } + lockmgr(&devfs_lock, LK_RELEASE); + + lwkt_replymsg((lwkt_msg_t)msg, 0); + } + wakeup(td_core/*devfs_id*/); + lwkt_exit(); +} + +/* + * Worker function to insert a new dev into the dev list and initialize its + * permissions. It also calls devfs_propagate_dev which in turn propagates + * the change to all mount points. + */ +static int +devfs_create_dev_worker(cdev_t dev, uid_t uid, gid_t gid, int perms) +{ + KKASSERT(dev); + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_create_dev_worker -1- -%s- (%p)\n", dev->si_name, dev); + + dev->si_uid = uid; + dev->si_gid = gid; + dev->si_perms = perms; + + devfs_link_dev(dev); + reference_dev(dev); + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_create_dev_worker -2-\n"); + devfs_propagate_dev(dev, 1); + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_create_dev_worker -end:3-\n"); + return 0; +} + +/* + * Worker function to delete a dev from the dev list and free the cdev. + * It also calls devfs_propagate_dev which in turn propagates the change + * to all mount points. + */ +static int +devfs_destroy_dev_worker(cdev_t dev) +{ + KKASSERT(dev); + KKASSERT((lockstatus(&devfs_lock, curthread)) == LK_EXCLUSIVE); + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_destroy_dev_worker -1- %s\n", dev->si_name); + devfs_unlink_dev(dev); + devfs_propagate_dev(dev, 0); + release_dev(dev); + release_dev(dev); + //objcache_put(devfs_dev_cache, dev); + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_destroy_dev_worker -end:5-\n"); + return 0; +} + +/* + * Worker function to destroy all devices with a certain basename. + * Calls devfs_destroy_dev_worker for the actual destruction. + */ +static int +devfs_destroy_subnames_worker(char *name) +{ + cdev_t dev, dev1; + //cdev_t found = NULL; + size_t len = strlen(name); + + TAILQ_FOREACH_MUTABLE(dev, &devfs_dev_list, link, dev1) { + if (!strncmp(dev->si_name, name, len)) { + if (dev->si_name[len] != '\0') + devfs_destroy_dev_worker(dev); + } + } + + return 0; +} + +/* + * Worker function that creates all device nodes on top of a devfs + * root node. + */ +static int +devfs_create_all_dev_worker(struct devfs_node *root) +{ + cdev_t dev; + + KKASSERT(root); + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_create_all_dev_worker -1-\n"); + + TAILQ_FOREACH(dev, &devfs_dev_list, link) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_create_all_dev_worker -loop:2- -%s-\n", dev->si_name); + devfs_create_device_node(root, dev, NULL, NULL); + } + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_create_all_dev_worker -end:3-\n"); + return 0; +} + +/* + * Worker function that destroys all devices that match a specific + * dev_ops and/or minor. If minor is less than 0, it is not matched + * against. It also propagates all changes. + */ +static int +devfs_destroy_dev_by_ops_worker(struct dev_ops *ops, int minor) +{ + cdev_t dev, dev1; + + KKASSERT(ops); + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_destroy_dev_by_ops_worker -1-\n"); + + TAILQ_FOREACH_MUTABLE(dev, &devfs_dev_list, link, dev1) { + if (dev->si_ops == ops) { + if ((minor < 0) || (dev->si_uminor == minor)) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_destroy_dev_by_ops_worker -loop:2- -%s-\n", dev->si_name); + //TAILQ_REMOVE(&devfs_dev_list, dev, link); + devfs_unlink_dev(dev); + devfs_propagate_dev(dev, 0); + release_dev(dev); + //objcache_put(devfs_dev_cache, dev); + } + } + } + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_destroy_dev_by_ops_worker -end:3-\n"); + return 0; +} + +/* + * Worker function that registers a new clone handler in devfs. + */ +static int +devfs_chandler_add_worker(char *name, d_clone_t *nhandler) +{ + struct devfs_clone_handler *chandler = NULL; + u_char len = strlen(name); + + if (!len) + return 1; + + TAILQ_FOREACH(chandler, &devfs_chandler_list, link) { + if (chandler->namlen == len) { + if (!memcmp(chandler->name, name, len)) { + /* Clonable basename already exists */ + return 1; + } + } + } + + chandler = kmalloc(sizeof(struct devfs_clone_handler), M_DEVFS, M_WAITOK); + memcpy(chandler->name, name, len+1); + chandler->namlen = len; + chandler->nhandler = nhandler; + + TAILQ_INSERT_TAIL(&devfs_chandler_list, chandler, link); + return 0; +} + +/* + * Worker function that removes a given clone handler from the + * clone handler list. + */ +static int +devfs_chandler_del_worker(char *name) +{ + struct devfs_clone_handler *chandler, *chandler2; + u_char len = strlen(name); + + if (!len) + return 1; + + TAILQ_FOREACH_MUTABLE(chandler, &devfs_chandler_list, link, chandler2) { + if (chandler->namlen == len) { + if (!memcmp(chandler->name, name, len)) { + TAILQ_REMOVE(&devfs_chandler_list, chandler, link); + kfree(chandler, M_DEVFS); + //break; + } + } + } + + return 0; +} + +/* + * Worker function that finds a given device name and changes + * the message received accordingly so that when replied to, + * the answer is returned to the caller. + */ +static int +devfs_find_device_by_name_worker(devfs_msg_t devfs_msg) +{ + cdev_t dev, dev1; + cdev_t found = NULL; + //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_find_device_by_name: %s -1-\n", target); + + TAILQ_FOREACH_MUTABLE(dev, &devfs_dev_list, link, dev1) { + //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_find_device_by_name -loop:2- -%s-\n", dev->si_name); + if (!strcmp(devfs_msg->m_name, dev->si_name)) { + found = dev; + break; + } + } + devfs_msg->m_cdev = found; + + return 0; +} + +/* + * Worker function that finds a given device udev and changes + * the message received accordingly so that when replied to, + * the answer is returned to the caller. + */ +static int +devfs_find_device_by_udev_worker(devfs_msg_t devfs_msg) +{ + cdev_t dev, dev1; + cdev_t found = NULL; + //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_find_device_by_name: %s -1-\n", target); + + TAILQ_FOREACH_MUTABLE(dev, &devfs_dev_list, link, dev1) { + //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_find_device_by_name -loop:2- -%s-\n", dev->si_name); + if (((udev_t)dev->si_inode) == devfs_msg->m_udev) { + found = dev; + break; + } + } + devfs_msg->m_cdev = found; + + return 0; +} + +/* + * Worker function that inserts a given alias into the + * alias list, and propagates the alias to all mount + * points. + */ +static int +devfs_make_alias_worker(struct devfs_alias *alias) +{ + struct devfs_alias *alias2; + size_t len = strlen(alias->name); + int found = 0; + + TAILQ_FOREACH(alias2, &devfs_alias_list, link) { + if (!memcmp(alias->name, alias2->name, len)) { + found = 1; + break; + } + } + + if (!found) { + TAILQ_INSERT_TAIL(&devfs_alias_list, alias, link); + devfs_alias_propagate(alias); + } else { + devfs_debug(DEVFS_DEBUG_DEBUG, "Warning: duplicate devfs_make_alias for %s\n", alias->name); + kfree(alias, M_DEVFS); + } + + return 0; +} + +/* + * Function that removes and frees all aliases. + */ +static int +devfs_alias_reap(void) +{ + struct devfs_alias *alias, *alias2; + + TAILQ_FOREACH_MUTABLE(alias, &devfs_alias_list, link, alias2) { + TAILQ_REMOVE(&devfs_alias_list, alias, link); + kfree(alias, M_DEVFS); + } + return 0; +} + +/* + * Function that removes an alias matching a specific cdev and frees + * it accordingly. + */ +static int +devfs_alias_remove(cdev_t dev) +{ + struct devfs_alias *alias, *alias2; + + TAILQ_FOREACH_MUTABLE(alias, &devfs_alias_list, link, alias2) { + if (alias->dev_target == dev) { + TAILQ_REMOVE(&devfs_alias_list, alias, link); + kfree(alias, M_DEVFS); + } + } + return 0; +} + +/* + * This function propagates a new alias to all mount points. + */ +static int +devfs_alias_propagate(struct devfs_alias *alias) +{ + struct devfs_mnt_data *mnt; + + TAILQ_FOREACH(mnt, &devfs_mnt_list, link) { + devfs_alias_apply(mnt->root_node, alias); + } + return 0; +} + +/* + * This function is a recursive function iterating through + * all device nodes in the topology and, if applicable, + * creating the relevant alias for a device node. + */ +static int +devfs_alias_apply(struct devfs_node *node, struct devfs_alias *alias) +{ + struct devfs_node *node1, *node2; + + KKASSERT(alias != NULL); + + if ((node->node_type == Proot) || (node->node_type == Pdir)) { + devfs_debug(DEVFS_DEBUG_DEBUG, "This node is Pdir or Proot; has %d children\n", node->nchildren); + if (node->nchildren > 2) { + TAILQ_FOREACH_MUTABLE(node1, DEVFS_DENODE_HEAD(node), link, node2) { + devfs_alias_apply(node1, alias); + } + } + } else { + if (node->d_dev == alias->dev_target) + devfs_alias_create(alias->name, node); + } + return 0; +} + +/* + * This function checks if any alias possibly is applicable + * to the given node. If so, the alias is created. + */ +static int +devfs_alias_check_create(struct devfs_node *node) +{ + struct devfs_alias *alias; + + TAILQ_FOREACH(alias, &devfs_alias_list, link) { + if (node->d_dev == alias->dev_target) + devfs_alias_create(alias->name, node); + } + return 0; +} + +/* + * This function creates an alias with a given name + * linking to a given devfs node. It also increments + * the link count on the target node. + */ +int +devfs_alias_create(char *name_orig, struct devfs_node *target) +{ + struct mount *mp = target->mp; + struct devfs_node *parent = DEVFS_MNTDATA(mp)->root_node; + struct devfs_node *linknode; + + //char *path = NULL; + char *create_path = NULL; + char *name, name_buf[PATH_MAX]; + + //XXX: possibly put this in many worker functions (at least those with ext. API) + KKASSERT((lockstatus(&devfs_lock, curthread)) == LK_EXCLUSIVE); + + devfs_resolve_name_path(name_orig, name_buf, &create_path, &name); + + if (create_path) + parent = devfs_resolve_or_create_path(parent, create_path, 1); + + + if (devfs_find_device_node_by_name(parent, name)) { + devfs_debug(DEVFS_DEBUG_DEBUG, "Node already exists: %s (devfs_make_alias_worker)!\n", name); + return 1; + } + + + linknode = devfs_allocp(Plink, name, parent, mp, NULL); + if (linknode == NULL) + return 1; + + linknode->link_target = target; + target->nlinks++; + //linknode->flags |= DEVFS_LINK; + + return 0; +} + +/* + * This function is called by the core and handles mount point + * strings. It either calls the relevant worker (devfs_apply_ + * reset_rules_worker) on all mountpoints or only a specific + * one. + */ +static int +devfs_apply_reset_rules_caller(char *mountto, int apply) +{ + //int found = 0; + struct devfs_mnt_data *mnt; + size_t len = strlen(mountto); + + if (mountto[0] != '*') { + TAILQ_FOREACH(mnt, &devfs_mnt_list, link) { + if ((len == mnt->mntonnamelen) && + (!memcmp(mnt->mp->mnt_stat.f_mntonname, mountto, len))) { + devfs_apply_reset_rules_worker(mnt->root_node, apply); + break; + } + } + } else { + TAILQ_FOREACH(mnt, &devfs_mnt_list, link) { + devfs_apply_reset_rules_worker(mnt->root_node, apply); + } + } + + kfree(mountto, M_DEVFS); + return 0; +} + +/* + * This worker function applies or resets, depending on the arguments, a rule + * to the whole given topology. *RECURSIVE* + */ +static int +devfs_apply_reset_rules_worker(struct devfs_node *node, int apply) +{ + struct devfs_node *node1, *node2; + + if ((node->node_type == Proot) || (node->node_type == Pdir)) { + devfs_debug(DEVFS_DEBUG_DEBUG, "This node is Pdir or Proot; has %d children\n", node->nchildren); + if (node->nchildren > 2) { + TAILQ_FOREACH_MUTABLE(node1, DEVFS_DENODE_HEAD(node), link, node2) { + devfs_apply_reset_rules_worker(node1, apply); + } + } + } + + if (apply) + devfs_rule_check_apply(node); + else + devfs_rule_reset_node(node); + + return 0; +} + + +/* + * This function calls a given callback function for + * every dev node in the devfs dev list. + */ +static int +devfs_scan_callback_worker(devfs_scan_t *callback) +{ + cdev_t dev, dev1; + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_scan_callback: %p -1-\n", callback); + + TAILQ_FOREACH_MUTABLE(dev, &devfs_dev_list, link, dev1) { + callback(dev); + } + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_scan_callback: finished\n"); + return 0; +} + + +/* + * This function tries to resolve a given directory, or if not + * found and creation requested, creates the given directory. + */ +static struct devfs_node * +devfs_resolve_or_create_dir(struct devfs_node *parent, char *dir_name, size_t name_len, int create) +{ + struct devfs_node *node, *found = NULL; + + TAILQ_FOREACH(node, DEVFS_DENODE_HEAD(parent), link) { + if (name_len == node->d_dir.d_namlen) { + if (!memcmp(dir_name, node->d_dir.d_name, name_len)) { + found = node; + break; + } + } + } + + if ((found == NULL) && (create)) { + found = devfs_allocp(Pdir, dir_name, parent, parent->mp, NULL); + } + + return found; +} + +/* + * This function tries to resolve a complete path. If creation is requested, + * if a given part of the path cannot be resolved (because it doesn't exist), + * it is created. + */ +struct devfs_node * +devfs_resolve_or_create_path(struct devfs_node *parent, char *path, int create) +{ + struct devfs_node *node = parent; + char buf[PATH_MAX]; + size_t idx = 0; + + + if (path == NULL) + return parent; + + + for (; *path != '\0' ; path++) { + if (*path != '/') { + buf[idx++] = *path; + } else { + buf[idx] = '\0'; + node = devfs_resolve_or_create_dir(node, buf, idx, create); + if (node == NULL) + return NULL; + idx = 0; + } + } + buf[idx] = '\0'; + return devfs_resolve_or_create_dir(node, buf, idx, create); +} + +/* + * Takes a full path and strips it into a directory path and a name. + * For a/b/c/foo, it returns foo in namep and a/b/c in pathp. It + * requires a working buffer with enough size to keep the whole + * fullpath. + */ +int +devfs_resolve_name_path(char *fullpath, char *buf, char **pathp, char **namep) +{ + char *name = NULL; + char *path = NULL; + size_t len = strlen(fullpath) + 1; + int i; + + KKASSERT((fullpath != NULL) && (buf != NULL) && (pathp != NULL) && (namep != NULL)); + + memcpy(buf, fullpath, len); + + for (i = len-1; i>= 0; i--) { + if (buf[i] == '/') { + buf[i] = '\0'; + name = &(buf[i+1]); + path = buf; + break; + } + } + + *pathp = path; + + if (name) { + *namep = name; + } else { + *namep = buf; + } + + return 0; +} + +/* + * This function creates a new devfs node for a given device. It can + * handle a complete path as device name, and accordingly creates + * the path and the final device node. + */ +struct devfs_node * +devfs_create_device_node(struct devfs_node *root, cdev_t dev, char *dev_name, char *path_fmt, ...) +{ + struct devfs_node *parent, *node = NULL; + char *path = NULL; + char *name, name_buf[PATH_MAX]; + __va_list ap; + int i, found; + + char *create_path = NULL; + char *names = "pqrsPQRS"; + + + //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_create_device_node : -%s- (%p)\n", dev->si_name, dev); + + if (path_fmt != NULL) { + path = kmalloc(PATH_MAX+1, M_DEVFS, M_WAITOK); + + __va_start(ap, path_fmt); + i = kvcprintf(path_fmt, NULL, path, 10, ap); + path[i] = '\0'; + __va_end(ap); + } + + parent = devfs_resolve_or_create_path(root, path, 1); + KKASSERT(parent); + + if (dev) + reference_dev(dev); + + devfs_resolve_name_path(((dev_name == NULL) && (dev))?(dev->si_name):(dev_name), name_buf, &create_path, &name); + + if (create_path) + parent = devfs_resolve_or_create_path(parent, create_path, 1); + + + if (devfs_find_device_node_by_name(parent, name)) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_create_device_node: DEVICE %s ALREADY EXISTS!!! Ignoring creation request.\n", name); + goto out; + } + devfs_debug(DEVFS_DEBUG_DEBUG, "parent->d_dir.d_name=%s\n", parent->d_dir.d_name); + node = devfs_allocp(Pdev, name, parent, parent->mp, dev); + devfs_debug(DEVFS_DEBUG_DEBUG, "node->d_dir.d_name=%s\n", node->d_dir.d_name); + + /* Ugly unix98 pty magic, to hide pty master (ptm) devices and their directory */ + if ((dev) && (strlen(dev->si_name) >= 4) && (!memcmp(dev->si_name, "ptm/", 4))) { + //node->parent->flags |= DEVFS_HIDDEN; + //node->flags |= DEVFS_HIDDEN; + } + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_create_device_node: marker A\n"); + /* Ugly pty magic, to tag pty devices as such and hide them if needed */ + if ((strlen(name) >= 3) && (!memcmp(name, "pty", 3))) + node->flags |= (DEVFS_PTY | DEVFS_INVISIBLE); + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_create_device_node: marker B\n"); + if ((strlen(name) >= 3) && (!memcmp(name, "tty", 3))) { + found = 0; + for (i = 0; i < strlen(names); i++) { + if (name[3] == names[i]) { + found = 1; + break; + } + } + if (found) + node->flags |= (DEVFS_PTY | DEVFS_INVISIBLE); + } + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_create_device_node: marker C\n"); + +out: + if (path_fmt != NULL) + kfree(path, M_DEVFS); + if (dev) + release_dev(dev); + + return node; +} + +/* + * This function finds a given device node in the topology with a given + * cdev. + */ +struct devfs_node * +devfs_find_device_node(struct devfs_node *node, cdev_t target) +{ + struct devfs_node *node1, *node2, *found = NULL; + + if ((node->node_type == Proot) || (node->node_type == Pdir)) { + devfs_debug(DEVFS_DEBUG_DEBUG, "This node is Pdir or Proot; has %d children\n", node->nchildren); + if (node->nchildren > 2) { + TAILQ_FOREACH_MUTABLE(node1, DEVFS_DENODE_HEAD(node), link, node2) { + if ((found = devfs_find_device_node(node1, target))) + return found; + } + } + } else if (node->node_type == Pdev) { + if (node->d_dev == target) + return node; + } + //devfs_debug(DEVFS_DEBUG_DEBUG, "This node is called %s\n", (found)?found->d_dir.d_name:"NOTFOUND"); + + return NULL; +} + +/* + * This function finds a device node in the topology by its + * name and returns it. + */ +struct devfs_node * +devfs_find_device_node_by_name(struct devfs_node *parent, char *target) +{ + struct devfs_node *node, *found = NULL; + size_t len = strlen(target); + + TAILQ_FOREACH(node, DEVFS_DENODE_HEAD(parent), link) { + if ((len == node->d_dir.d_namlen) && (!memcmp(node->d_dir.d_name, target, len))) { + found = node; + break; + } + } + + return found; +} + +/* + * This function takes a cdev and destroys its devfs node in the + * given topology. + */ +int +devfs_destroy_device_node(struct devfs_node *root, cdev_t target) +{ + struct devfs_node *node, *parent; + + char *name, name_buf[PATH_MAX]; + //__va_list ap; + //int i; + + char *create_path = NULL; + + KKASSERT(target); + + + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_destroy_device_node\n"); + memcpy(name_buf, target->si_name, strlen(target->si_name)+1); + + devfs_resolve_name_path(target->si_name, name_buf, &create_path, &name); + devfs_debug(DEVFS_DEBUG_DEBUG, "create_path: %s\n", create_path); + devfs_debug(DEVFS_DEBUG_DEBUG, "name: %s\n", name); + + if (create_path) + parent = devfs_resolve_or_create_path(root, create_path, 0); + else + parent = root; + devfs_debug(DEVFS_DEBUG_DEBUG, "-> marker <-\n"); + if (parent == NULL) + return 1; + devfs_debug(DEVFS_DEBUG_DEBUG, "->d_dir.d_name=%s\n", parent->d_dir.d_name); + node = devfs_find_device_node_by_name(parent, name); + devfs_debug(DEVFS_DEBUG_DEBUG, "->d_dir.d_name=%s\n", (node)?(node->d_dir.d_name):"SHIT!"); + if (node) { + devfs_gc(node); + } + + return 0; +} + +/* + * Just set perms and ownership for given node. + */ +int +devfs_set_perms(struct devfs_node *node, uid_t uid, gid_t gid, u_short mode, u_long flags) +{ + node->mode = mode; /* files access mode and type */ + node->uid = uid; /* owner user id */ + node->gid = gid; /* owner group id */ + //node->flags = flags; + + return 0; +} + +/* + * Propagates a device attach/detach to all mount + * points. Also takes care of automatic alias removal + * for a deleted cdev. + */ +static int +devfs_propagate_dev(cdev_t dev, int attach) +{ + struct devfs_mnt_data *mnt; + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_propagate_dev -1-\n"); + TAILQ_FOREACH(mnt, &devfs_mnt_list, link) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_propagate_dev -loop:2-\n"); + if (attach) { + /* Device is being attached */ + //devfs_create_device_node(struct devfs_node *root, struct devfs_dev *dev, char *dev_name, char *path_fmt, ...) + devfs_create_device_node(mnt->root_node, dev, NULL, NULL ); + } else { + /* Device is being detached */ + //devfs_destroy_device_node(struct devfs_node *root, struct devfs_dev *target) + devfs_alias_remove(dev); + devfs_destroy_device_node(mnt->root_node, dev); + } + } + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_propagate_dev -end:3-\n"); + return 0; +} + +/* + * devfs_node_to_path takes a node and a buffer of a size of + * at least PATH_MAX, resolves the full path from the root + * node and writes it in a humanly-readable format into the + * buffer. + * If DEVFS_STASH_DEPTH is less than the directory level up + * to the root node, only the last DEVFS_STASH_DEPTH levels + * of the path are resolved. + */ +int +devfs_node_to_path(struct devfs_node *node, char *buffer) +{ +#define DEVFS_STASH_DEPTH 32 + struct devfs_node *node_stash[DEVFS_STASH_DEPTH]; + int i, offset; + memset(buffer, 0, PATH_MAX); + + for (i = 0; (i < DEVFS_STASH_DEPTH) && (node->node_type != Proot); i++) { + node_stash[i] = node; + node = node->parent; + } + i--; + + for (offset = 0; i >= 0; i--) { + memcpy(buffer+offset, node_stash[i]->d_dir.d_name, node_stash[i]->d_dir.d_namlen); + offset += node_stash[i]->d_dir.d_namlen; + if (i > 0) { + *(buffer+offset) = '/'; + offset++; + } + } +#undef DEVFS_STASH_DEPTH + return 0; +} + +/* + * devfs_clone either returns a basename from a complete name by + * returning the length of the name without trailing digits, or, + * if clone != 0, calls the device's clone handler to get a new + * device, which in turn is returned in devp. + */ +int +devfs_clone(char *name, size_t *namlenp, cdev_t *devp, int clone, struct ucred *cred) +{ + KKASSERT(namlenp); + + size_t len = *namlenp; + int error = 1; + struct devfs_clone_handler *chandler; + struct dev_clone_args ap; + + if (!clone) { + for (; (len > 0) && (DEVFS_ISDIGIT(name[len-1])); len--); + } + + TAILQ_FOREACH(chandler, &devfs_chandler_list, link) { + devfs_debug(DEVFS_DEBUG_DEBUG, "len=%d, chandler->namlen=%d\n", len, chandler->namlen); + devfs_debug(DEVFS_DEBUG_DEBUG, "name=%s, chandler->name=%s\n", name, chandler->name); + if ((chandler->namlen == len) && + (!memcmp(chandler->name, name, len)) && + (chandler->nhandler)) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nclone: found clone handler for the base name at %p\n", chandler->nhandler); + if (clone) { + ap.a_dev = NULL; + ap.a_name = name; + ap.a_namelen = len; + ap.a_cred = cred; + error = (chandler->nhandler)(&ap); + KKASSERT(devp); + *devp = ap.a_dev; + } else { + *namlenp = len; + error = 0; + } + + break; + } + } + + return error; +} + + +/* + * Registers a new orphan in the orphan list. + */ +void +devfs_tracer_add_orphan(struct devfs_node *node) +{ + struct devfs_orphan *orphan; + + KKASSERT(node); + orphan = kmalloc(sizeof(struct devfs_orphan), M_DEVFS, M_WAITOK); + orphan->node = node; + + TAILQ_INSERT_TAIL(DEVFS_ORPHANLIST(node->mp), orphan, link); +} + +/* + * Removes an orphan from the orphan list. + */ +void +devfs_tracer_del_orphan(struct devfs_node *node) +{ + struct devfs_orphan *orphan; + + KKASSERT(node); + + TAILQ_FOREACH(orphan, DEVFS_ORPHANLIST(node->mp), link) { + if (orphan->node == node) { + TAILQ_REMOVE(DEVFS_ORPHANLIST(node->mp), orphan, link); + kfree(orphan, M_DEVFS); + break; + } + } +} + +/* + * Counts the orphans in the orphan list, and if cleanup + * is specified, also frees the orphan and removes it from + * the list. + */ +size_t +devfs_tracer_orphan_count(struct mount *mp, int cleanup) +{ + struct devfs_orphan *orphan, *orphan2; + size_t count = 0; + + TAILQ_FOREACH_MUTABLE(orphan, DEVFS_ORPHANLIST(mp), link, orphan2) { + count++; + if (cleanup) { + orphan->node->flags |= DEVFS_NO_TRACE; + devfs_freep(orphan->node); + TAILQ_REMOVE(DEVFS_ORPHANLIST(mp), orphan, link); + kfree(orphan, M_DEVFS); + } + } + + return count; +} + +/* + * Fetch an ino_t from the global d_ino by increasing it + * while spinlocked. + */ +static ino_t +devfs_fetch_ino(void) +{ + ino_t ret; + + spin_lock_wr(&ino_lock); + ret = d_ino++; + spin_unlock_wr(&ino_lock); + + return ret; +} + +/* + * Allocates a new cdev and initializes it's most basic + * fields. + */ +cdev_t +devfs_new_cdev(struct dev_ops *ops, int minor) +{ +// cdev_t dev = objcache_get(devfs_dev_cache, M_WAITOK); +// memset(dev, 0, sizeof(struct cdev)); + + cdev_t dev = sysref_alloc(&cdev_sysref_class); + sysref_activate(&dev->si_sysref); + reference_dev(dev); + devfs_debug(DEVFS_DEBUG_DEBUG, "new_cdev: clearing first %d bytes\n", offsetof(struct cdev, si_sysref)); + memset(dev, 0, offsetof(struct cdev, si_sysref)); + + dev->si_uid = 0; + dev->si_gid = 0; + dev->si_perms = 0; + dev->si_drv1 = NULL; + dev->si_drv2 = NULL; + dev->si_lastread = 0; /* time_second */ + dev->si_lastwrite = 0; /* time_second */ + + dev->si_ops = ops; + dev->si_flags = SI_HASHED | SI_ADHOC; //XXX: any real use? + dev->si_umajor = 0; + dev->si_uminor = minor; + dev->si_inode = devfs_fetch_ino(); + + return dev; +} + + +static void devfs_cdev_terminate(cdev_t dev) +{ + int locked = 0; + + /* Check if it is locked already. if not, we acquire the devfs lock */ + if (!(lockstatus(&devfs_lock, curthread)) == LK_EXCLUSIVE) { + lockmgr(&devfs_lock, LK_EXCLUSIVE); + locked = 1; + } + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_cdev_terminate: Taking care of dev->si_name=%s\n", dev->si_name); + + /* Propagate destruction, just in case */ + devfs_propagate_dev(dev, 0); + + /* If we acquired the lock, we also get rid of it */ + if (locked) + lockmgr(&devfs_lock, LK_RELEASE); + + /* Finally destroy the device */ + sysref_put(&dev->si_sysref); +} + +/* + * Frees a given cdev + */ +int +devfs_destroy_cdev(cdev_t dev) +{ + release_dev(dev); + //objcache_put(devfs_dev_cache, dev); + return 0; +} + +/* + * Links a given cdev into the dev list. + */ +int +devfs_link_dev(cdev_t dev) +{ + dev->si_flags |= SI_DEVFS_LINKED; + TAILQ_INSERT_TAIL(&devfs_dev_list, dev, link); + + return 0; +} + +/* + * Removes a given cdev from the dev list. + */ +int +devfs_unlink_dev(cdev_t dev) +{ + if ((dev->si_flags & SI_DEVFS_LINKED)) { + TAILQ_REMOVE(&devfs_dev_list, dev, link); + dev->si_flags &= ~SI_DEVFS_LINKED; + } + + return 0; +} + +void +devfs_config(void *arg) +{ + devfs_msg_t msg; + + msg = devfs_msg_get(); + + kprintf("devfs_config: sync'ing up\n"); + msg = devfs_msg_send_sync(DEVFS_SYNC, msg); + devfs_msg_put(msg); +} + +/* + * Called on init of devfs; creates the objcaches and + * spawns off the devfs core thread. Also initializes + * locks. + */ +static void +devfs_init(void) +{ + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_init() called\n"); + /* Create objcaches for nodes, msgs and devs */ + devfs_node_cache = objcache_create("devfs-node-cache", 0, 0, + NULL, NULL, NULL, + objcache_malloc_alloc, + objcache_malloc_free, + &devfs_node_malloc_args ); + + devfs_msg_cache = objcache_create("devfs-msg-cache", 0, 0, + NULL, NULL, NULL, + objcache_malloc_alloc, + objcache_malloc_free, + &devfs_msg_malloc_args ); + + devfs_dev_cache = objcache_create("devfs-dev-cache", 0, 0, + NULL, NULL, NULL, + objcache_malloc_alloc, + objcache_malloc_free, + &devfs_dev_malloc_args ); + + /* Initialize the reply-only port which acts as a message drain */ + lwkt_initport_replyonly(&devfs_dispose_port, devfs_msg_autofree_reply); + + /* Initialize *THE* devfs lock */ + lockinit(&devfs_lock, "devfs_core lock", 0, 0); + + + lwkt_create(devfs_msg_core, /*args*/NULL, &td_core, NULL, + 0, 0, "devfs_msg_core"); + + tsleep(td_core/*devfs_id*/, 0, "devfsc", 0); + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_init finished\n"); +} + +/* + * Called on unload of devfs; takes care of destroying the core + * and the objcaches. Also removes aliases that are no longer needed. + */ +static void +devfs_uninit(void) +{ + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_uninit() called\n"); + + devfs_msg_send(DEVFS_TERMINATE_CORE, NULL); + + tsleep(td_core/*devfs_id*/, 0, "devfsc", 0); + tsleep(td_core/*devfs_id*/, 0, "devfsc", 10000); + + /* Destroy the objcaches */ + objcache_destroy(devfs_msg_cache); + objcache_destroy(devfs_node_cache); + objcache_destroy(devfs_dev_cache); + + devfs_alias_reap(); +} + +/* + * This is a sysctl handler to assist userland devname(3) to + * find the device name for a given udev. + */ +static int +devfs_sysctl_devname_helper(SYSCTL_HANDLER_ARGS) +{ + udev_t udev; + cdev_t found; + int error; + + + if ((error = SYSCTL_IN(req, &udev, sizeof(udev_t)))) + return (error); + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs sysctl, received udev: %d\n", udev); + + if (udev == NOUDEV) + return(EINVAL); + + if ((found = devfs_find_device_by_udev(udev)) == NULL) + return(ENOENT); + + return(SYSCTL_OUT(req, found->si_name, strlen(found->si_name) + 1)); +} + + +SYSCTL_PROC(_kern, OID_AUTO, devname, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_ANYBODY, + NULL, 0, devfs_sysctl_devname_helper, "", "helper for devname(3)"); + +static SYSCTL_NODE(_vfs, OID_AUTO, devfs, CTLFLAG_RW, 0, "devfs"); +TUNABLE_INT("vfs.devfs.debug", &devfs_debug_enable); +SYSCTL_INT(_vfs_devfs, OID_AUTO, debug, CTLFLAG_RW, &devfs_debug_enable, 0, "Enable DevFS debugging"); + +SYSINIT(vfs_devfs_register, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST, devfs_init, NULL); +SYSUNINIT(vfs_devfs_register, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, devfs_uninit, NULL); diff --git a/sys/vfs/devfs/devfs_helper.c b/sys/vfs/devfs/devfs_helper.c new file mode 100644 index 0000000000..2d635c0065 --- /dev/null +++ b/sys/vfs/devfs/devfs_helper.c @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2009 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Alex Hornung + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include +#include + +MALLOC_DECLARE(M_DEVFS); + +static struct devfs_unit_hash *devfs_clone_hash_get(int, cdev_t); +static void devfs_clone_hash_put(struct devfs_unit_hash *); +static int devfs_clone_hash_add(struct devfs_unit_hash **, struct devfs_unit_hash *); +static struct devfs_unit_hash *devfs_clone_hash_del(struct devfs_unit_hash **, int); + +/* + * DEVFS clone hash functions + */ + +static struct devfs_unit_hash * +devfs_clone_hash_get(int unit_no, cdev_t dev) +{ + struct devfs_unit_hash *hash = (struct devfs_unit_hash *)kmalloc(sizeof(struct devfs_unit_hash), M_DEVFS, M_WAITOK); + hash->next = NULL; + hash->unit_no = unit_no; + hash->dev = dev; + + return hash; +} + + +static void +devfs_clone_hash_put(struct devfs_unit_hash *hash) +{ + kfree(hash, M_DEVFS); +} + + +static int +devfs_clone_hash_add(struct devfs_unit_hash **devfs_hash_array, struct devfs_unit_hash *hash) +{ + struct devfs_unit_hash **hashp; + hashp = &devfs_hash_array[hash->unit_no & + DEVFS_UNIT_HMASK]; + while (*hashp) { + if ((*hashp)->unit_no == + hash->unit_no) + return(EEXIST); + hashp = &(*hashp)->next; + } + hash->next = NULL; + *hashp = hash; + return (0); +} + + +static struct devfs_unit_hash * +devfs_clone_hash_del(struct devfs_unit_hash **devfs_hash_array, int unit_no) +{ + struct devfs_unit_hash **hashp; + struct devfs_unit_hash *hash; + hashp = &devfs_hash_array[unit_no & + DEVFS_UNIT_HMASK]; + hash = *hashp; + while ((*hashp)->unit_no != unit_no) { + KKASSERT(*hashp != NULL); + hashp = &(*hashp)->next; + hash = *hashp; + } + *hashp = hash->next; + + return hash; +} + +/* + * DEVFS clone bitmap functions + */ +void +devfs_clone_bitmap_init(struct devfs_bitmap *bitmap) +{ + bitmap->bitmap = (unsigned long *)kmalloc(DEVFS_BITMAP_INITIAL_SIZE*sizeof(unsigned long), M_DEVFS, M_WAITOK); + bitmap->chunks = DEVFS_BITMAP_INITIAL_SIZE; + memset(bitmap->bitmap, ULONG_MAX, DEVFS_BITMAP_INITIAL_SIZE*sizeof(unsigned long)); +} + + +void +devfs_clone_bitmap_uninit(struct devfs_bitmap *bitmap) +{ + kfree(bitmap, M_DEVFS); +} + + +void +devfs_clone_bitmap_resize(struct devfs_bitmap *bitmap, int newchunks) +{ + int oldchunks = bitmap->chunks; + bitmap->chunks = newchunks+2; + bitmap->bitmap = (unsigned long *)krealloc(bitmap->bitmap, sizeof(unsigned long)*bitmap->chunks, M_DEVFS, M_WAITOK); + + devfs_debug(DEVFS_DEBUG_DEBUG, "%d vs %d (oldchunks=%d)\n", bitmap->bitmap, bitmap->bitmap + oldchunks, oldchunks); + memset(bitmap->bitmap + oldchunks, ULONG_MAX, sizeof(unsigned long)*(bitmap->chunks - oldchunks)); +} + + +int +devfs_clone_bitmap_fff(struct devfs_bitmap *bitmap) +{ + unsigned long curbitmap; + int bit, i; + int chunks = bitmap->chunks; + + for (i = 0; i < chunks+1; i++) { + if (i == chunks) + devfs_clone_bitmap_resize(bitmap, i); + curbitmap = bitmap->bitmap[i]; + + if (curbitmap > 0) { + curbitmap &= (~curbitmap)+1; + for (bit = 1; curbitmap != 1; bit++) + curbitmap = (unsigned long)curbitmap >> 1; + + return bit-1 + (i<<3) * sizeof(unsigned long); + } + } + + /* Should never happen as we dynamically resize as needed */ + return -1; +} + + +int +devfs_clone_bitmap_chk(struct devfs_bitmap *bitmap, int unit) +{ + int chunk = unit / (sizeof(unsigned long)<<3); + unit -= chunk<<3 * sizeof(unsigned long); + + if (chunk >= bitmap->chunks) + return 1; + + return !((bitmap->bitmap[chunk]) & (1<<(unit))); +} + + +void +devfs_clone_bitmap_set(struct devfs_bitmap *bitmap, int unit) +{ + int chunk = unit / (sizeof(unsigned long)<<3); + unit -= chunk<<3 * sizeof(unsigned long); + + if (chunk >= bitmap->chunks) { + devfs_clone_bitmap_resize(bitmap, chunk); + } + + bitmap->bitmap[chunk] ^= (1<= bitmap->chunks) + return; + + bitmap->bitmap[chunk] |= (1< 0) && (unit > limit)) + return -1; + + devfs_clone_bitmap_set(bitmap, unit); + + return unit; +} + +/* + * DEVFS clone helper functions + */ + +void +devfs_clone_helper_init(struct devfs_clone_helper *helper) +{ + devfs_clone_bitmap_init(&helper->DEVFS_CLONE_BITMAP(generic)); + memset(&helper->DEVFS_CLONE_HASHLIST(generic), 0, DEVFS_UNIT_HSIZE*sizeof(void *)); +} + + +void +devfs_clone_helper_uninit(struct devfs_clone_helper *helper) +{ + devfs_clone_bitmap_uninit(&helper->DEVFS_CLONE_BITMAP(generic)); + //XXX: free all elements in helper->DEVFS_HASHLIST(generic) +} + + +int +devfs_clone_helper_insert(struct devfs_clone_helper *helper, cdev_t dev) +{ + struct devfs_unit_hash *hash; + int error = 0; + int unit_no; + +try_again: + unit_no = devfs_clone_bitmap_fff(&helper->DEVFS_CLONE_BITMAP(generic)); + + devfs_clone_bitmap_set(&helper->DEVFS_CLONE_BITMAP(generic), unit_no); + hash = devfs_clone_hash_get(unit_no, dev); + + error = devfs_clone_hash_add(helper->DEVFS_CLONE_HASHLIST(generic), hash); + KKASSERT(!error); + + if (error) + goto try_again; + + dev->si_uminor = unit_no; + return unit_no; +} + + +int +devfs_clone_helper_remove(struct devfs_clone_helper *helper, int unit_no) +{ + struct devfs_unit_hash *hash; + hash = devfs_clone_hash_del(helper->DEVFS_CLONE_HASHLIST(generic), unit_no); + devfs_clone_bitmap_rst(&helper->DEVFS_CLONE_BITMAP(generic), unit_no); + kfree(hash, M_DEVFS); + + return 0; +} diff --git a/sys/vfs/devfs/devfs_rules.c b/sys/vfs/devfs/devfs_rules.c new file mode 100644 index 0000000000..5be0dabd12 --- /dev/null +++ b/sys/vfs/devfs/devfs_rules.c @@ -0,0 +1,522 @@ +/* + * Copyright (c) 2009 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Alex Hornung + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MALLOC_DECLARE(M_DEVFS); + + +static int WildCmp(const char *w, const char *s); +static int WildCaseCmp(const char *w, const char *s); +static int wildCmp(const char **mary, int d, const char *w, const char *s); +static int wildCaseCmp(const char **mary, int d, const char *w, const char *s); + +static d_open_t devfs_dev_open; +static d_close_t devfs_dev_close; +static d_ioctl_t devfs_dev_ioctl; + +static struct devfs_rule *devfs_rule_alloc(struct devfs_rule *); +static void devfs_rule_free(struct devfs_rule *); +static void devfs_rule_insert(struct devfs_rule *); +static void devfs_rule_remove(struct devfs_rule *); +static void devfs_rule_clear(struct devfs_rule *); + +static int devfs_rule_checkname(struct devfs_rule *, struct devfs_node *); + +static struct objcache *devfs_rule_cache; +static struct lock devfs_rule_lock; + +static struct objcache_malloc_args devfs_rule_malloc_args = { + sizeof(struct devfs_rule), M_DEVFS }; + +static cdev_t devfs_dev; +static struct devfs_rule_head devfs_rule_list = TAILQ_HEAD_INITIALIZER(devfs_rule_list); + +static struct dev_ops devfs_dev_ops = { + { "devfs", 0, 0 }, + .d_open = devfs_dev_open, + .d_close = devfs_dev_close, + .d_ioctl = devfs_dev_ioctl +}; + + +static struct devfs_rule * +devfs_rule_alloc(struct devfs_rule *templ) +{ + struct devfs_rule *rule = objcache_get(devfs_rule_cache, M_WAITOK); + + memcpy(rule, templ, sizeof(struct devfs_rule)); + return rule; +} + + +static void +devfs_rule_free(struct devfs_rule *rule) +{ + objcache_put(devfs_rule_cache, rule); +} + + +static void +devfs_rule_insert(struct devfs_rule *templ) +{ + struct devfs_rule *rule = devfs_rule_alloc(templ); + lockmgr(&devfs_rule_lock, LK_EXCLUSIVE); + rule->mntpointlen = strlen(rule->mntpoint); + TAILQ_INSERT_TAIL(&devfs_rule_list, rule, link); + lockmgr(&devfs_rule_lock, LK_RELEASE); +} + + +static void +devfs_rule_remove(struct devfs_rule *rule) +{ + TAILQ_REMOVE(&devfs_rule_list, rule, link); + devfs_rule_free(rule); +} + + +static void +devfs_rule_clear(struct devfs_rule *rule) +{ + struct devfs_rule *rule1, *rule2; + rule->mntpointlen = strlen(rule->mntpoint); + + lockmgr(&devfs_rule_lock, LK_EXCLUSIVE); + TAILQ_FOREACH_MUTABLE(rule1, &devfs_rule_list, link, rule2) { + if ((rule->mntpoint[0] == '*') || + ( (rule->mntpointlen == rule1->mntpointlen) && + (!memcmp(rule->mntpoint, rule1->mntpoint, rule->mntpointlen)) )) { + devfs_rule_remove(rule1); + } + } + lockmgr(&devfs_rule_lock, LK_RELEASE); +} + + +int +devfs_rule_reset_node(struct devfs_node *node) +{ + node->flags &= ~DEVFS_HIDDEN; + + if ((node->node_type == Pdev) && (node->d_dev)) { + node->uid = node->d_dev->si_uid; + node->gid = node->d_dev->si_gid; + node->mode = node->d_dev->si_perms; + } + + return 0; +} + + +int +devfs_rule_check_apply(struct devfs_node *node) +{ + struct devfs_rule *rule; + struct mount *mp = node->mp; + int applies = 0; + + lockmgr(&devfs_rule_lock, LK_EXCLUSIVE); + TAILQ_FOREACH(rule, &devfs_rule_list, link) { + + /* + * Skip this rule if it is only intended for jailed mount points + * and the current mount point isn't jailed + */ + if ((rule->rule_type & DEVFS_RULE_JAIL) && + (!(DEVFS_MNTDATA(mp)->jailed)) ) + continue; + + /* + * Skip this rule if the mount point specified in the rule doesn't + * match the mount point of the node + */ + if ((rule->mntpoint[0] != '*') && + ((rule->mntpointlen != DEVFS_MNTDATA(mp)->mntonnamelen) || + (memcmp(rule->mntpoint, mp->mnt_stat.f_mntonname, rule->mntpointlen)))) + continue; + + /* + * Skip this rule if this is a by-type rule and the device flags + * don't match the specified device type in the rule + */ + if ((rule->rule_type & DEVFS_RULE_TYPE) && + ( (rule->dev_type == 0) || (!dev_is_good(node->d_dev)) || + (!dev_dflags(node->d_dev) & rule->dev_type)) ) + continue; + + /* + * Skip this rule if this is a by-name rule and the node name + * doesn't match the wildcard string in the rule + */ + if ((rule->rule_type & DEVFS_RULE_NAME) && + (!devfs_rule_checkname(rule, node)) ) + continue; + + + if (rule->rule_type & DEVFS_RULE_HIDE) { + /* + * If we should hide the device, we just apply the relevant + * hide flag to the node and let devfs do the rest in the + * vnops + */ + node->flags |= DEVFS_HIDDEN; + applies = 1; + } else if (rule->rule_type & DEVFS_RULE_SHOW) { + /* + * Show rule just means that the node should not be hidden, so + * what we do is clear the hide flag from the node. + */ + node->flags &= ~DEVFS_HIDDEN; + applies = 1; + } else if ((rule->rule_type & DEVFS_RULE_LINK) && (node->node_type != Plink)) { + /* + * This is a LINK rule, so we tell devfs to create + * a link with the correct name to this node. + */ + devfs_alias_create(rule->linkname, node); + applies = 1; + } else { + /* + * This is a normal ownership/permission rule. We + * just apply the permissions and ownership and + * we are done. + */ + node->mode = rule->mode; + node->uid = rule->uid; + node->gid = rule->gid; + applies = 1; + } + } + lockmgr(&devfs_rule_lock, LK_RELEASE); + return applies; +} + + +static int +devfs_rule_checkname(struct devfs_rule *rule, struct devfs_node *node) +{ + struct devfs_node *parent = DEVFS_MNTDATA(node->mp)->root_node; + char *path = NULL; + char *name, name_buf[PATH_MAX]; + int no_match = 0; + + devfs_resolve_name_path(rule->name, name_buf, &path, &name); + parent = devfs_resolve_or_create_path(parent, path, 0); + + if (parent == NULL) + return 0; /* no match */ + + /* Check if node is a child of the parent we found */ + if (node->parent != parent) + return 0; /* no match */ + + if (rule->rule_type & DEVFS_RULE_LINK) + no_match = memcmp(name, node->d_dir.d_name, strlen(name)); + else + no_match = WildCaseCmp(name, node->d_dir.d_name); + + return !no_match; +} + + +static int +devfs_dev_open(struct dev_open_args *ap) +{ + /* + * Only allow read-write access. + */ + if (((ap->a_oflags & FWRITE) == 0) || ((ap->a_oflags & FREAD) == 0)) + return(EPERM); + + /* + * We don't allow nonblocking access. + */ + if ((ap->a_oflags & O_NONBLOCK) != 0) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_dev: can't do nonblocking access\n"); + return(ENODEV); + } + + return 0; +} + + +static int +devfs_dev_close(struct dev_close_args *ap) +{ + return 0; +} + + +static int +devfs_dev_ioctl(struct dev_ioctl_args *ap) +{ + int error; + struct devfs_rule *rule; + + error = 0; + rule = (struct devfs_rule *)ap->a_data; + + switch(ap->a_cmd) { + case DEVFS_RULE_ADD: + devfs_rule_insert(rule); + break; + + case DEVFS_RULE_APPLY: + devfs_apply_rules(rule->mntpoint); + break; + + case DEVFS_RULE_CLEAR: + devfs_rule_clear(rule); + break; + + case DEVFS_RULE_RESET: + devfs_reset_rules(rule->mntpoint); + break; + + default: + error = ENOTTY; /* Inappropriate ioctl for device */ + break; + } + + return(error); +} + + +static void +devfs_dev_init(void *unused) +{ + lockinit(&devfs_rule_lock, "devfs_rule lock", 0, 0); + + devfs_rule_cache = objcache_create("devfs-rule-cache", 0, 0, + NULL, NULL, NULL, + objcache_malloc_alloc, + objcache_malloc_free, + &devfs_rule_malloc_args ); + + devfs_dev = make_dev(&devfs_dev_ops, + 0, + UID_ROOT, + GID_WHEEL, + 0600, + "devfs"); +} + + +static void +devfs_dev_uninit(void *unused) +{ + //XXX: destroy all rules first + destroy_dev(devfs_dev); + objcache_destroy(devfs_rule_cache); +} + + +SYSINIT(devfsdev,SI_SUB_DRIVERS,SI_ORDER_FIRST,devfs_dev_init,NULL) +SYSUNINIT(devfsdev, SI_SUB_DRIVERS,SI_ORDER_FIRST,devfs_dev_uninit, NULL); + + + +static int +WildCmp(const char *w, const char *s) +{ + int i; + int c; + int slen = strlen(s); + const char **mary; + + for (i = c = 0; w[i]; ++i) { + if (w[i] == '*') + ++c; + } + mary = kmalloc(sizeof(char *) * (c + 1), M_DEVFS, M_WAITOK); + for (i = 0; i < c; ++i) + mary[i] = s + slen; + i = wildCmp(mary, 0, w, s); + kfree(mary, M_DEVFS); + return(i); +} + +static int +WildCaseCmp(const char *w, const char *s) +{ + int i; + int c; + int slen = strlen(s); + const char **mary; + + for (i = c = 0; w[i]; ++i) { + if (w[i] == '*') + ++c; + } + mary = kmalloc(sizeof(char *) * (c + 1), M_DEVFS, M_WAITOK); + for (i = 0; i < c; ++i) + mary[i] = s + slen; + i = wildCaseCmp(mary, 0, w, s); + kfree(mary, M_DEVFS); + return(i); +} + +/* + * WildCmp() - compare wild string to sane string + * + * Returns 0 on success, -1 on failure. + */ +static int +wildCmp(const char **mary, int d, const char *w, const char *s) +{ + int i; + + /* + * skip fixed portion + */ + for (;;) { + switch(*w) { + case '*': + /* + * optimize terminator + */ + if (w[1] == 0) + return(0); + if (w[1] != '?' && w[1] != '*') { + /* + * optimize * followed by non-wild + */ + for (i = 0; s + i < mary[d]; ++i) { + if (s[i] == w[1] && wildCmp(mary, d + 1, w + 1, s + i) == 0) + return(0); + } + } else { + /* + * less-optimal + */ + for (i = 0; s + i < mary[d]; ++i) { + if (wildCmp(mary, d + 1, w + 1, s + i) == 0) + return(0); + } + } + mary[d] = s; + return(-1); + case '?': + if (*s == 0) + return(-1); + ++w; + ++s; + break; + default: + if (*w != *s) + return(-1); + if (*w == 0) /* terminator */ + return(0); + ++w; + ++s; + break; + } + } + /* not reached */ + return(-1); +} + + +/* + * WildCaseCmp() - compare wild string to sane string, case insensitive + * + * Returns 0 on success, -1 on failure. + */ +static int +wildCaseCmp(const char **mary, int d, const char *w, const char *s) +{ + int i; + + /* + * skip fixed portion + */ + for (;;) { + switch(*w) { + case '*': + /* + * optimize terminator + */ + if (w[1] == 0) + return(0); + if (w[1] != '?' && w[1] != '*') { + /* + * optimize * followed by non-wild + */ + for (i = 0; s + i < mary[d]; ++i) { + if (s[i] == w[1] && wildCaseCmp(mary, d + 1, w + 1, s + i) == 0) + return(0); + } + } else { + /* + * less-optimal + */ + for (i = 0; s + i < mary[d]; ++i) { + if (wildCaseCmp(mary, d + 1, w + 1, s + i) == 0) + return(0); + } + } + mary[d] = s; + return(-1); + case '?': + if (*s == 0) + return(-1); + ++w; + ++s; + break; + default: + if (*w != *s) { +#define tolower(x) ((x >= 'A' && x <= 'Z')?(x+('a'-'A')):(x)) + if (tolower(*w) != tolower(*s)) + return(-1); + } + if (*w == 0) /* terminator */ + return(0); + ++w; + ++s; + break; + } + } + /* not reached */ + return(-1); +} diff --git a/sys/vfs/devfs/devfs_rules.h b/sys/vfs/devfs/devfs_rules.h new file mode 100644 index 0000000000..6f229e5b4f --- /dev/null +++ b/sys/vfs/devfs/devfs_rules.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2009 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Alex Hornung + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#ifndef _VFS_DEVFS_RULES_H_ +#define _VFS_DEVFS_RULES_H_ + +#ifndef _SYS_QUEUE_H_ +#include +#endif +#ifndef _SYS_TYPES_H_ +#include +#endif + +#define DEVFS_MAX_POLICY_LENGTH 32 + +struct devfs_rule { + u_long rule_type; + char mntpoint[255 + 1]; + u_short mntpointlen; + + char name[DEVFS_MAX_POLICY_LENGTH]; + char linkname[SPECNAMELEN+1]; + + u_long dev_type; /* Type of device to which the rule applies */ + u_short mode; /* files access mode and type */ + uid_t uid; /* owner user id */ + gid_t gid; /* owner group id */ + + struct devfs_rule *group; + + TAILQ_ENTRY(devfs_rule) link; +}; + +#define DEVFS_RULE_NAME 0x01 +#define DEVFS_RULE_TYPE 0x02 +#define DEVFS_RULE_LINK 0x04 +#define DEVFS_RULE_JAIL 0x08 +#define DEVFS_RULE_HIDE 0x10 +#define DEVFS_RULE_SHOW 0x20 + +#define DEVFS_RULE_ADD _IOWR('d', 221, struct devfs_rule) +#define DEVFS_RULE_APPLY _IOWR('d', 222, struct devfs_rule) +#define DEVFS_RULE_CLEAR _IOWR('d', 223, struct devfs_rule) +#define DEVFS_RULE_RESET _IOWR('d', 224, struct devfs_rule) + +#if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) + +#ifndef _VFS_DEVFS_H_ +#include +#endif + +TAILQ_HEAD(devfs_rule_head, devfs_rule); + + +int devfs_rule_check_apply(struct devfs_node *); +int devfs_rule_check_reverse(struct devfs_node *); +int devfs_rule_reset_node(struct devfs_node *); +#endif /* _KERNEL */ +#endif /* _VFS_DEVFS_RULES_H_ */ diff --git a/sys/vfs/devfs/devfs_vfsops.c b/sys/vfs/devfs/devfs_vfsops.c new file mode 100644 index 0000000000..1141a06057 --- /dev/null +++ b/sys/vfs/devfs/devfs_vfsops.c @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2009 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Alex Hornung + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include +#include + +MALLOC_DECLARE(M_DEVFS); + +extern struct vop_ops devfs_vnode_norm_vops; +extern struct vop_ops devfs_vnode_dev_vops; +extern struct lock devfs_lock; + +static int devfs_mount (struct mount *mp, char *path, caddr_t data, + struct ucred *cred); +static int devfs_statfs (struct mount *mp, struct statfs *sbp, + struct ucred *cred); +static int devfs_unmount (struct mount *mp, int mntflags); +int devfs_root(struct mount *mp, struct vnode **vpp); + +/* + * VFS Operations. + * + * mount system call + */ +/* ARGSUSED */ +static int +devfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred) +{ + size_t size; + + devfs_debug(DEVFS_DEBUG_DEBUG, "(vfsops) devfs_mount() called!\n"); + + if (mp->mnt_flag & MNT_UPDATE) + return (EOPNOTSUPP); + + + mp->mnt_flag |= MNT_LOCAL; + mp->mnt_kern_flag |= MNTK_NOSTKMNT; + mp->mnt_data = 0; + vfs_getnewfsid(mp); + + size = sizeof("devfs") - 1; + bcopy("devfs", mp->mnt_stat.f_mntfromname, size); + bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); + devfs_statfs(mp, &mp->mnt_stat, cred); + + //XXX: save other mount info passed from userland or so. + mp->mnt_data = kmalloc(sizeof(struct devfs_mnt_data), M_DEVFS, M_WAITOK); + + lockmgr(&devfs_lock, LK_EXCLUSIVE); + DEVFS_MNTDATA(mp)->jailed = jailed(cred); + DEVFS_MNTDATA(mp)->mntonnamelen = strlen(mp->mnt_stat.f_mntonname); + DEVFS_MNTDATA(mp)->leak_count = 0; + DEVFS_MNTDATA(mp)->root_node = devfs_allocp(Proot, "", NULL, mp, NULL); + KKASSERT(DEVFS_MNTDATA(mp)->root_node); + TAILQ_INIT(DEVFS_ORPHANLIST(mp)); + lockmgr(&devfs_lock, LK_RELEASE); + + vfs_add_vnodeops(mp, &devfs_vnode_norm_vops, &mp->mnt_vn_norm_ops); + vfs_add_vnodeops(mp, &devfs_vnode_dev_vops, &mp->mnt_vn_spec_ops); + + devfs_debug(DEVFS_DEBUG_DEBUG, "calling devfs_mount_add\n"); + devfs_mount_add(DEVFS_MNTDATA(mp)); + + return (0); +} + +/* + * unmount system call + */ +static int +devfs_unmount(struct mount *mp, int mntflags) +{ + int error = 0; + int flags = 0; + + devfs_debug(DEVFS_DEBUG_DEBUG, "(vfsops) devfs_unmount() called!\n"); + + if (mntflags & MNT_FORCE) + flags |= FORCECLOSE; + + error = vflush(mp, 0, flags); + + if (error) + return (error); + + devfs_debug(DEVFS_DEBUG_DEBUG, "There were %d devfs_node orphans left\n", devfs_tracer_orphan_count(mp, 1)); + devfs_debug(DEVFS_DEBUG_DEBUG, "There are %d devfs_node orphans left\n", devfs_tracer_orphan_count(mp, 0)); + devfs_mount_del(DEVFS_MNTDATA(mp)); + kfree(mp->mnt_data, M_DEVFS); + + return (0); +} + +/* + * Sets *vpp to the root procfs vnode, referenced and exclusively locked + */ +int +devfs_root(struct mount *mp, struct vnode **vpp) +{ + int ret; + devfs_debug(DEVFS_DEBUG_DEBUG, "(vfsops) devfs_root() called!\n"); + lockmgr(&devfs_lock, LK_EXCLUSIVE); + ret = devfs_allocv(vpp, DEVFS_MNTDATA(mp)->root_node); + lockmgr(&devfs_lock, LK_RELEASE); + return ret; +} + +/* + * Get file system statistics. + */ +static int +devfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) +{ + devfs_debug(DEVFS_DEBUG_DEBUG, "(vfsops) devfs_stat() called!\n"); + sbp->f_bsize = DEV_BSIZE; + sbp->f_iosize = DEV_BSIZE; + sbp->f_blocks = 2; /* avoid divide by zero in some df's */ + sbp->f_bfree = 0; + sbp->f_bavail = 0; + sbp->f_files = 0; + sbp->f_ffree = 0; + + if (sbp != &mp->mnt_stat) { + sbp->f_type = mp->mnt_vfc->vfc_typenum; + bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid)); + bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); + } + + return (0); +} + +static struct vfsops devfs_vfsops = { + .vfs_mount = devfs_mount, + .vfs_unmount = devfs_unmount, + .vfs_root = devfs_root, + .vfs_statfs = devfs_statfs, + .vfs_sync = vfs_stdsync +}; + +VFS_SET(devfs_vfsops, devfs, VFCF_SYNTHETIC); +MODULE_VERSION(devfs, 1); diff --git a/sys/vfs/devfs/devfs_vnops.c b/sys/vfs/devfs/devfs_vnops.c new file mode 100644 index 0000000000..77ca18ad41 --- /dev/null +++ b/sys/vfs/devfs/devfs_vnops.c @@ -0,0 +1,2047 @@ +/* + * Copyright (c) 2009 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Alex Hornung + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +MALLOC_DECLARE(M_DEVFS); +#define DEVFS_BADOP (void *)devfs_badop + +static int devfs_badop(struct vop_generic_args *); +static int devfs_access(struct vop_access_args *); +static int devfs_inactive(struct vop_inactive_args *); +static int devfs_reclaim(struct vop_reclaim_args *); +static int devfs_readdir(struct vop_readdir_args *); +static int devfs_getattr(struct vop_getattr_args *); +static int devfs_setattr(struct vop_setattr_args *); +static int devfs_readlink(struct vop_readlink_args *); +static int devfs_print(struct vop_print_args *); + +static int devfs_nresolve(struct vop_nresolve_args *); +static int devfs_nlookupdotdot(struct vop_nlookupdotdot_args *); +static int devfs_nsymlink(struct vop_nsymlink_args *); +static int devfs_nremove(struct vop_nremove_args *); + +static int devfs_spec_open(struct vop_open_args *); +static int devfs_spec_close(struct vop_close_args *); +static int devfs_spec_fsync(struct vop_fsync_args *); + +static int devfs_spec_read(struct vop_read_args *); +static int devfs_spec_write(struct vop_write_args *); +static int devfs_spec_ioctl(struct vop_ioctl_args *); +static int devfs_spec_poll(struct vop_poll_args *); +static int devfs_spec_kqfilter(struct vop_kqfilter_args *); +static int devfs_spec_strategy(struct vop_strategy_args *); +static void devfs_spec_strategy_done(struct bio *); +static int devfs_spec_freeblks(struct vop_freeblks_args *); +static int devfs_spec_bmap(struct vop_bmap_args *); +static int devfs_spec_advlock(struct vop_advlock_args *); +static void devfs_spec_getpages_iodone(struct bio *); +static int devfs_spec_getpages(struct vop_getpages_args *); + + +static int devfs_specf_close(struct file *); +static int devfs_specf_read(struct file *, struct uio *, struct ucred *, int); +static int devfs_specf_write(struct file *, struct uio *, struct ucred *, int); +static int devfs_specf_stat(struct file *, struct stat *, struct ucred *); +static int devfs_specf_kqfilter(struct file *, struct knote *); +static int devfs_specf_poll(struct file *, int, struct ucred *); +static int devfs_specf_ioctl(struct file *, u_long, caddr_t, struct ucred *); + + +static __inline int sequential_heuristic(struct uio *, struct file *); +extern struct lock devfs_lock; + +/* + * devfs vnode operations for regular files + */ +struct vop_ops devfs_vnode_norm_vops = { + .vop_default = vop_defaultop, + .vop_access = devfs_access, + .vop_advlock = DEVFS_BADOP, + .vop_bmap = DEVFS_BADOP, + .vop_close = vop_stdclose, + .vop_getattr = devfs_getattr, + .vop_inactive = devfs_inactive, + .vop_ncreate = DEVFS_BADOP, + .vop_nresolve = devfs_nresolve, + .vop_nlookupdotdot = devfs_nlookupdotdot, + .vop_nlink = DEVFS_BADOP, + .vop_nmkdir = DEVFS_BADOP, + .vop_nmknod = DEVFS_BADOP, + .vop_nremove = devfs_nremove, + .vop_nrename = DEVFS_BADOP, + .vop_nrmdir = DEVFS_BADOP, + .vop_nsymlink = devfs_nsymlink, + .vop_open = vop_stdopen, + .vop_pathconf = vop_stdpathconf, + .vop_print = devfs_print, + .vop_read = DEVFS_BADOP, + .vop_readdir = devfs_readdir, + .vop_readlink = devfs_readlink, + .vop_reclaim = devfs_reclaim, + .vop_setattr = devfs_setattr, + .vop_write = DEVFS_BADOP, + .vop_ioctl = DEVFS_BADOP +}; + +/* + * devfs vnode operations for character devices + */ +struct vop_ops devfs_vnode_dev_vops = { + .vop_default = vop_defaultop, + .vop_access = devfs_access, + .vop_advlock = devfs_spec_advlock, + .vop_bmap = devfs_spec_bmap, + .vop_close = devfs_spec_close, + .vop_freeblks = devfs_spec_freeblks, + .vop_fsync = devfs_spec_fsync, + .vop_getattr = devfs_getattr, + .vop_getpages = devfs_spec_getpages, + .vop_inactive = devfs_inactive, + .vop_open = devfs_spec_open, + .vop_pathconf = vop_stdpathconf, + .vop_print = devfs_print, + .vop_poll = devfs_spec_poll, + .vop_kqfilter = devfs_spec_kqfilter, + .vop_read = devfs_spec_read, + .vop_readdir = DEVFS_BADOP, + .vop_readlink = DEVFS_BADOP, + .vop_reclaim = devfs_reclaim, + .vop_setattr = devfs_setattr, + .vop_strategy = devfs_spec_strategy, + .vop_write = devfs_spec_write, + .vop_ioctl = devfs_spec_ioctl +}; + +struct vop_ops *devfs_vnode_dev_vops_p = &devfs_vnode_dev_vops; + +struct fileops devfs_dev_fileops = { + .fo_read = devfs_specf_read, + .fo_write = devfs_specf_write, + .fo_ioctl = devfs_specf_ioctl, + .fo_poll = devfs_specf_poll, + .fo_kqfilter = devfs_specf_kqfilter, + .fo_stat = devfs_specf_stat, + .fo_close = devfs_specf_close, + .fo_shutdown = nofo_shutdown +}; + + +/* + * generic entry point for unsupported operations + */ +static int +devfs_badop(struct vop_generic_args *ap) +{ + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs: specified vnode operation is not implemented (yet)\n"); + return (EIO); +} + + +static int +devfs_access(struct vop_access_args *ap) +{ + struct devfs_node *node = DEVFS_NODE(ap->a_vp); + int error = 0; + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_access() called!\n"); + + error = vop_helper_access(ap, node->uid, node->gid, + node->mode, node->flags); + + //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_access ruled over %s: %d\n", "UNKNOWN", error); + + return error; + //XXX: consider possible special cases? terminal, ...? +} + + +static int +devfs_inactive(struct vop_inactive_args *ap) +{ + //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_inactive() called!\n"); + + /* Check if the devfs_node is not linked anymore into the topology. + * If this is the case, we suggest that the vnode is recycled. */ + if (DEVFS_NODE(ap->a_vp)) { + if ((DEVFS_NODE(ap->a_vp)->flags & DEVFS_NODE_LINKED) == 0) { + vrecycle(ap->a_vp); + } + } + + return 0; +} + + +static int +devfs_reclaim(struct vop_reclaim_args *ap) +{ + int locked = 0; + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_reclaim() called!\n"); + + /* Check if it is locked already. if not, we acquire the devfs lock */ + if (!(lockstatus(&devfs_lock, curthread)) == LK_EXCLUSIVE) { + lockmgr(&devfs_lock, LK_EXCLUSIVE); + locked = 1; + } + + /* Check if the devfs_node is not linked anymore into the topology. + * If this is the case, we get rid of the devfs_node. */ + if (DEVFS_NODE(ap->a_vp)) { + if ((DEVFS_NODE(ap->a_vp)->flags & DEVFS_NODE_LINKED) == 0) { + devfs_freep(DEVFS_NODE(ap->a_vp)); + //devfs_tracer_del_orphan(DEVFS_NODE(ap->a_vp)); + } + + /* unlink vnode <--> devfs_node */ + DEVFS_NODE(ap->a_vp)->v_node = NULL; + } + + /* If we acquired the lock, we also get rid of it */ + if (locked) + lockmgr(&devfs_lock, LK_RELEASE); + + ap->a_vp->v_data = NULL; + /* avoid a panic on release because of not adding it with v_associate_rdev */ + ap->a_vp->v_rdev = NULL; + + return 0; +} + + +static int +devfs_readdir(struct vop_readdir_args *ap) +{ + struct devfs_node *node; + int error2 = 0, r, error = 0; + + int cookie_index; + int ncookies; + off_t *cookies; + off_t saveoff; + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_readdir() called!\n"); + + if (ap->a_uio->uio_offset < 0 || ap->a_uio->uio_offset > INT_MAX) + return (EINVAL); + if ((error = vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY)) != 0) + return (error); + + saveoff = ap->a_uio->uio_offset; + + if (ap->a_ncookies) { + ncookies = ap->a_uio->uio_resid / 16 + 1; /* Why / 16 ?? */ + if (ncookies > 256) + ncookies = 256; + cookies = kmalloc(256 * sizeof(off_t), M_TEMP, M_WAITOK); + cookie_index = 0; + } else { + ncookies = -1; + cookies = NULL; + cookie_index = 0; + } + + nanotime(&DEVFS_NODE(ap->a_vp)->atime); + + if (saveoff == 0) { + r = vop_write_dirent(&error, ap->a_uio, DEVFS_NODE(ap->a_vp)->d_dir.d_ino, DT_DIR, 1, "."); + if (r) + goto done; + if (cookies) + cookies[cookie_index] = saveoff; + saveoff++; + cookie_index++; + if (cookie_index == ncookies) + goto done; + } + + if (saveoff == 1) { + if (DEVFS_NODE(ap->a_vp)->parent) { + r = vop_write_dirent(&error, ap->a_uio, + DEVFS_NODE(ap->a_vp)->d_dir.d_ino, + DT_DIR, 2, ".."); + } else { + r = vop_write_dirent(&error, ap->a_uio, + DEVFS_NODE(ap->a_vp)->d_dir.d_ino, DT_DIR, 2, ".."); + } + if (r) + goto done; + if (cookies) + cookies[cookie_index] = saveoff; + saveoff++; + cookie_index++; + if (cookie_index == ncookies) + goto done; + } + + TAILQ_FOREACH(node, DEVFS_DENODE_HEAD(DEVFS_NODE(ap->a_vp)), link) { + if ((node->flags & DEVFS_HIDDEN) || (node->flags & DEVFS_INVISIBLE)) + continue; + + if (node->cookie < saveoff) + continue; +/* + if (skip > 0) { + skip--; + continue; + } +*/ + saveoff = node->cookie; + + error2 = vop_write_dirent(&error, ap->a_uio, + node->d_dir.d_ino, node->d_dir.d_type, + node->d_dir.d_namlen, node->d_dir.d_name); + + if(error2) + break; + + saveoff++; + + if (cookies) + cookies[cookie_index] = node->cookie; + ++cookie_index; + if (cookie_index == ncookies) + break; + + //count++; + } + +done: + vn_unlock(ap->a_vp); + + ap->a_uio->uio_offset = saveoff; + if (error && cookie_index == 0) { + if (cookies) { + kfree(cookies, M_TEMP); + *ap->a_ncookies = 0; + *ap->a_cookies = NULL; + } + } else { + if (cookies) { + *ap->a_ncookies = cookie_index; + *ap->a_cookies = cookies; + } + } + return (error); +} + + +static int +devfs_nresolve(struct vop_nresolve_args *ap) +{ + struct devfs_node *node, *found = NULL; + struct namecache *ncp; + struct vnode *vp = NULL; + //void *ident; + int error = 0; + int len; + int hidden = 0; + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve() called!\n"); + + ncp = ap->a_nch->ncp; + len = ncp->nc_nlen; + + lockmgr(&devfs_lock, LK_EXCLUSIVE); + + if ((DEVFS_NODE(ap->a_dvp)->node_type != Proot) && + (DEVFS_NODE(ap->a_dvp)->node_type != Pdir)) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve: ap->a_dvp is not a dir!!!\n"); + cache_setvp(ap->a_nch, NULL); + goto out; + } + +search: + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -search- \n"); + TAILQ_FOREACH(node, DEVFS_DENODE_HEAD(DEVFS_NODE(ap->a_dvp)), link) { + if (len == node->d_dir.d_namlen) { + if (!memcmp(ncp->nc_name, node->d_dir.d_name, len)) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve: found: %s\n", ncp->nc_name); + found = node; + break; + } + } + } + + if (found) { + if ((found->node_type == Plink) && (found->link_target)) + found = found->link_target; + + if (!(found->flags & DEVFS_HIDDEN)) + devfs_allocv(/*ap->a_dvp->v_mount, */ &vp, found); + else + hidden = 1; + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -2- \n"); + } + + //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -3- %c%c%c\n", ncp->nc_name[0], ncp->nc_name[1], ncp->nc_name[2]); + if (vp == NULL) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve vp==NULL \n"); + /* XXX: len is int, devfs_clone expects size_t*, not int* */ + if ((!hidden) && (!devfs_clone(ncp->nc_name, &len, NULL, 0, ap->a_cred))) { + goto search; + } + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -4- \n"); + error = ENOENT; + cache_setvp(ap->a_nch, NULL); + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -5- \n"); + goto out; + + } + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -6- \n"); + KKASSERT(vp); + vn_unlock(vp); + cache_setvp(ap->a_nch, vp); + vrele(vp); + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -9- \n"); +out: + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -end:10- failed? %s \n", (error)?"FAILED!":"OK!"); + lockmgr(&devfs_lock, LK_RELEASE); + return error; +} + + +static int +devfs_nlookupdotdot(struct vop_nlookupdotdot_args *ap) +{ + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nlookupdotdot() called!\n"); + *ap->a_vpp = NULL; + + lockmgr(&devfs_lock, LK_EXCLUSIVE); + if (DEVFS_NODE(ap->a_dvp)->parent != NULL) { + devfs_allocv(/*ap->a_dvp->v_mount, */ap->a_vpp, DEVFS_NODE(ap->a_dvp)->parent); + vn_unlock(*ap->a_vpp); + } + lockmgr(&devfs_lock, LK_RELEASE); + + return ((*ap->a_vpp == NULL) ? ENOENT : 0); +} + + +static int +devfs_getattr(struct vop_getattr_args *ap) +{ + struct vattr *vap = ap->a_vap; + struct devfs_node *node = DEVFS_NODE(ap->a_vp); + int error = 0; + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_getattr() called for %s!\n", DEVFS_NODE(ap->a_vp)->d_dir.d_name); + + /* start by zeroing out the attributes */ + VATTR_NULL(vap); + + /* next do all the common fields */ + vap->va_type = ap->a_vp->v_type; + vap->va_mode = node->mode; + vap->va_fileid = DEVFS_NODE(ap->a_vp)->d_dir.d_ino ; + vap->va_flags = 0; //what should this be? + vap->va_blocksize = DEV_BSIZE; + vap->va_bytes = vap->va_size = sizeof(struct devfs_node); + + //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_getattr() check dev %s!\n", (DEVFS_NODE(ap->a_vp)->d_dev)?(DEVFS_NODE(ap->a_vp)->d_dev->si_name):"Not a device"); + + vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; + + + vap->va_atime = node->atime; + vap->va_mtime = node->mtime; + vap->va_ctime = node->ctime; + + vap->va_nlink = 1; /* number of references to file */ + + vap->va_uid = node->uid; + vap->va_gid = node->gid; + + vap->va_rmajor = 0; + vap->va_rminor = 0; + + if ((DEVFS_NODE(ap->a_vp)->node_type == Pdev) && + (DEVFS_NODE(ap->a_vp)->d_dev)) { + devfs_debug(DEVFS_DEBUG_DEBUG, "getattr: dev is: %p\n", DEVFS_NODE(ap->a_vp)->d_dev); + reference_dev(DEVFS_NODE(ap->a_vp)->d_dev); + vap->va_fsid = dev2udev(DEVFS_NODE(ap->a_vp)->d_dev); + vap->va_rminor = DEVFS_NODE(ap->a_vp)->d_dev->si_uminor; + release_dev(DEVFS_NODE(ap->a_vp)->d_dev); + } + + /* For a softlink the va_size is the length of the softlink */ + if (DEVFS_NODE(ap->a_vp)->symlink_name != 0) { + vap->va_size = DEVFS_NODE(ap->a_vp)->symlink_namelen; + } + nanotime(&node->atime); + return (error); //XXX: set error usefully +} + + +static int +devfs_setattr(struct vop_setattr_args *ap) +{ + struct devfs_node *node; + struct vattr *vap; + int error = 0; + + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_setattr() called!\n"); + lockmgr(&devfs_lock, LK_EXCLUSIVE); + + vap = ap->a_vap; + node = DEVFS_NODE(ap->a_vp); + + if (vap->va_uid != (uid_t)VNOVAL) { + if ((ap->a_cred->cr_uid != node->uid) && + (!groupmember(node->gid, ap->a_cred))) { + error = priv_check(curthread, PRIV_VFS_CHOWN); + if (error) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_setattr, erroring out -1-\n"); + goto out; + } + } + node->uid = vap->va_uid; + } + + if (vap->va_gid != (uid_t)VNOVAL) { + if ((ap->a_cred->cr_uid != node->uid) && + (!groupmember(node->gid, ap->a_cred))) { + error = priv_check(curthread, PRIV_VFS_CHOWN); + if (error) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_setattr, erroring out -2-\n"); + goto out; + } + } + node->gid = vap->va_gid; + } + + if (vap->va_mode != (mode_t)VNOVAL) { + if (ap->a_cred->cr_uid != node->uid) { + error = priv_check(curthread, PRIV_VFS_ADMIN); + if (error) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_setattr, erroring out -3-\n"); + goto out; + } + } + node->mode = vap->va_mode; + } + +out: + nanotime(&node->mtime); + lockmgr(&devfs_lock, LK_RELEASE); + return error; +} + + +static int +devfs_readlink(struct vop_readlink_args *ap) +{ + struct devfs_node *node = DEVFS_NODE(ap->a_vp); + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_readlink() called!\n"); + + return (uiomove(node->symlink_name, node->symlink_namelen, ap->a_uio)); +} + + +static int +devfs_print(struct vop_print_args *ap) +{ + //struct devfs_node *node = DEVFS_NODE(ap->a_vp); + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_print() called!\n"); + + //XXX: print some useful debugging about node. + return (0); +} + + +static int +devfs_nsymlink(struct vop_nsymlink_args *ap) +{ + size_t targetlen = strlen(ap->a_target); + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nsymlink() called!\n"); + + ap->a_vap->va_type = VLNK; + + if ((DEVFS_NODE(ap->a_dvp)->node_type != Proot) && + (DEVFS_NODE(ap->a_dvp)->node_type != Pdir)) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nsymlink: ap->a_dvp is not a dir!!!\n"); + goto out; + } + lockmgr(&devfs_lock, LK_EXCLUSIVE); + devfs_allocvp(ap->a_dvp->v_mount, ap->a_vpp, Plink, + ap->a_nch->ncp->nc_name, DEVFS_NODE(ap->a_dvp), NULL); + + if (*ap->a_vpp) { + DEVFS_NODE(*ap->a_vpp)->flags |= DEVFS_USER_CREATED; + + DEVFS_NODE(*ap->a_vpp)->symlink_namelen = targetlen; + DEVFS_NODE(*ap->a_vpp)->symlink_name = kmalloc(targetlen + 1, M_DEVFS, M_WAITOK); + memcpy(DEVFS_NODE(*ap->a_vpp)->symlink_name, ap->a_target, targetlen); + DEVFS_NODE(*ap->a_vpp)->symlink_name[targetlen] = '\0'; + cache_setunresolved(ap->a_nch); + //problematic to use cache_* inside lockmgr() ? Probably not... + cache_setvp(ap->a_nch, *ap->a_vpp); + } + lockmgr(&devfs_lock, LK_RELEASE); +out: + return ((*ap->a_vpp == NULL) ? ENOTDIR : 0); + +} + + +static int +devfs_nremove(struct vop_nremove_args *ap) +{ + struct devfs_node *node; + struct namecache *ncp; + //struct vnode *vp = NULL; + int error = ENOENT; + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nremove() called!\n"); + + ncp = ap->a_nch->ncp; + + lockmgr(&devfs_lock, LK_EXCLUSIVE); + + if ((DEVFS_NODE(ap->a_dvp)->node_type != Proot) && + (DEVFS_NODE(ap->a_dvp)->node_type != Pdir)) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nremove: ap->a_dvp is not a dir!!!\n"); + goto out; + } + + TAILQ_FOREACH(node, DEVFS_DENODE_HEAD(DEVFS_NODE(ap->a_dvp)), link) { + if (ncp->nc_nlen == node->d_dir.d_namlen) { + if (!memcmp(ncp->nc_name, node->d_dir.d_name, ncp->nc_nlen)) { + // allow only removal of user created stuff (e.g. symlinks) + if ((node->flags & DEVFS_USER_CREATED) == 0) { + error = EPERM; + goto out; + } else { + if (node->v_node) + cache_inval_vp(node->v_node, CINV_DESTROY); + + devfs_unlinkp(node); + error = 0; + break; + } + } + } + } + + cache_setunresolved(ap->a_nch); + cache_setvp(ap->a_nch, NULL); + //cache_inval_vp(node->v_node, CINV_DESTROY); + +out: + lockmgr(&devfs_lock, LK_RELEASE); + //vrele(ap->a_dvp); + //vput(ap->a_dvp); + return error; +} + + +static int +devfs_spec_open(struct vop_open_args *ap) +{ + struct vnode *vp = ap->a_vp; + cdev_t dev, ndev = NULL; + struct devfs_node *node = NULL; + int error = 0; + size_t len; + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open() called\n"); + + if (DEVFS_NODE(vp)) { + if (DEVFS_NODE(vp)->d_dev == NULL) + return ENXIO; + } + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open: -1-\n"); + + if ((dev = vp->v_rdev) == NULL) + return ENXIO; + + if (DEVFS_NODE(vp) && ap->a_fp) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open: -1.1-\n"); + lockmgr(&devfs_lock, LK_EXCLUSIVE); + len = DEVFS_NODE(vp)->d_dir.d_namlen; + if (!(devfs_clone(DEVFS_NODE(vp)->d_dir.d_name, &len, &ndev, 1, ap->a_cred))) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open: -1.2- |%s|\n", ndev->si_name); + + dev = ndev; + reference_dev(dev); + devfs_link_dev(dev); + node = devfs_create_device_node(DEVFS_MNTDATA(vp->v_mount)->root_node, dev, NULL, NULL); + //node = devfs_allocp(Pdev, ndev->si_name, DEVFS_NODE(vp)->parent, vp->v_mount, dev); + + devfs_debug(DEVFS_DEBUG_DEBUG, "parent here is: %s, node is: |%s|\n", (DEVFS_NODE(vp)->parent->node_type == Proot)?"ROOT!":DEVFS_NODE(vp)->parent->d_dir.d_name, node->d_dir.d_name); + devfs_debug(DEVFS_DEBUG_DEBUG, "test: %s\n", ((struct devfs_node *)(TAILQ_LAST(DEVFS_DENODE_HEAD(DEVFS_NODE(vp)->parent), devfs_node_head)))->d_dir.d_name); + + node->flags |= DEVFS_CLONED; + devfs_allocv(&vp, node); + + ap->a_vp = vp; + + //XXX: propagate to other devfs mounts? + } + lockmgr(&devfs_lock, LK_RELEASE); + } + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open() called on %s! \n", dev->si_name); + /* + * Make this field valid before any I/O in ->d_open + */ + if (!dev->si_iosize_max) + dev->si_iosize_max = DFLTPHYS; + + if (dev_dflags(dev) & D_TTY) + vp->v_flag |= VISTTY; + + vn_unlock(vp); + error = dev_dopen(dev, ap->a_mode, S_IFCHR, ap->a_cred); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + + if (error) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open() error out: %x\n", error); + if (DEVFS_NODE(vp) && ((DEVFS_NODE(vp)->flags & DEVFS_CLONED) == DEVFS_CLONED)) + vput(vp); + return error; + } + + + if (dev_dflags(dev) & D_TTY) { + if (dev->si_tty) { + struct tty *tp; + tp = dev->si_tty; + if (!tp->t_stop) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs: no t_stop\n"); + tp->t_stop = nottystop; + } + } + } + + + if (vn_isdisk(vp, NULL)) { + if (!dev->si_bsize_phys) + dev->si_bsize_phys = DEV_BSIZE; + vinitvmio(vp, IDX_TO_OFF(INT_MAX)); + } + + vop_stdopen(ap); + if (DEVFS_NODE(vp)) + nanotime(&DEVFS_NODE(vp)->atime); + + if (DEVFS_NODE(vp) && ((DEVFS_NODE(vp)->flags & DEVFS_CLONED) == DEVFS_CLONED)) + vn_unlock(vp); + + /* Ugly pty magic, to make pty devices appear once they are opened */ + if (DEVFS_NODE(vp) && ((DEVFS_NODE(vp)->flags & DEVFS_PTY) == DEVFS_PTY)) + DEVFS_NODE(vp)->flags &= ~DEVFS_INVISIBLE; + + if (ap->a_fp) { + ap->a_fp->f_type = DTYPE_VNODE; + ap->a_fp->f_flag = ap->a_mode & FMASK; + ap->a_fp->f_ops = &devfs_dev_fileops; + ap->a_fp->f_data = vp; + } + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open: -end:3-\n"); + + return 0; +} + + +static int +devfs_spec_close(struct vop_close_args *ap) +{ + struct proc *p = curproc; + struct vnode *vp = ap->a_vp; + cdev_t dev = vp->v_rdev; + int error = 0; + int needrelock; + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() called on %s! \n", dev->si_name); + + /* + * A couple of hacks for devices and tty devices. The + * vnode ref count cannot be used to figure out the + * last close, but we can use v_opencount now that + * revoke works properly. + * + * Detect the last close on a controlling terminal and clear + * the session (half-close). + */ + if (dev) + reference_dev(dev); + + if (p && vp->v_opencount <= 1 && vp == p->p_session->s_ttyvp) { + p->p_session->s_ttyvp = NULL; + vrele(vp); + } + + /* + * Vnodes can be opened and closed multiple times. Do not really + * close the device unless (1) it is being closed forcibly, + * (2) the device wants to track closes, or (3) this is the last + * vnode doing its last close on the device. + * + * XXX the VXLOCK (force close) case can leave vnodes referencing + * a closed device. This might not occur now that our revoke is + * fixed. + */ + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -1- \n"); + if (dev && ((vp->v_flag & VRECLAIMED) || + (dev_dflags(dev) & D_TRACKCLOSE) || + (vp->v_opencount == 1))) { + needrelock = 0; + if (vn_islocked(vp)) { + needrelock = 1; + vn_unlock(vp); + } + error = dev_dclose(dev, ap->a_fflag, S_IFCHR); + if (DEVFS_NODE(vp) && (DEVFS_NODE(vp)->flags & DEVFS_CLONED) == DEVFS_CLONED) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close: last of the cloned ones, so delete node %s\n", dev->si_name); + devfs_unlinkp(DEVFS_NODE(vp)); + devfs_freep(DEVFS_NODE(vp)); + devfs_unlink_dev(dev); + release_dev(dev); + devfs_destroy_cdev(dev); + } + /* Ugly pty magic, to make pty devices disappear again once they are closed */ + if (DEVFS_NODE(vp) && ((DEVFS_NODE(vp)->flags & DEVFS_PTY) == DEVFS_PTY)) + DEVFS_NODE(vp)->flags |= DEVFS_INVISIBLE; + + if (needrelock) + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + } else { + error = 0; + } + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -2- \n"); + /* + * Track the actual opens and closes on the vnode. The last close + * disassociates the rdev. If the rdev is already disassociated or the + * opencount is already 0, the vnode might have been revoked and no + * further opencount tracking occurs. + */ + if (dev) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -3- \n"); + if (vp->v_opencount == 1) { + //vp->v_rdev = 0; + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -3.5- \n"); + } + release_dev(dev); + } + if (vp->v_opencount > 0) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -4- \n"); + vop_stdclose(ap); + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -5- \n"); + } + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -end:6- \n"); + return(error); + +} + + +static int +devfs_specf_close(struct file *fp) +{ + int error; + struct vnode *vp = (struct vnode *)fp->f_data; + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_close() called! \n"); + get_mplock(); + fp->f_ops = &badfileops; + + error = vn_close(vp, fp->f_flag); + rel_mplock(); + + return (error); +} + + +/* + * Device-optimized file table vnode read routine. + * + * This bypasses the VOP table and talks directly to the device. Most + * filesystems just route to specfs and can make this optimization. + * + * MPALMOSTSAFE - acquires mplock + */ +static int +devfs_specf_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags) +{ + struct vnode *vp; + int ioflag; + int error; + cdev_t dev; + + get_mplock(); + //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_read() called! \n"); + KASSERT(uio->uio_td == curthread, + ("uio_td %p is not td %p", uio->uio_td, curthread)); + + vp = (struct vnode *)fp->f_data; + if (vp == NULL || vp->v_type == VBAD) { + error = EBADF; + goto done; + } + + if ((dev = vp->v_rdev) == NULL) { + error = EBADF; + goto done; + } + //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_read() called! for dev %s\n", dev->si_name); + + reference_dev(dev); + + if (uio->uio_resid == 0) { + error = 0; + goto done; + } + if ((flags & O_FOFFSET) == 0) + uio->uio_offset = fp->f_offset; + + ioflag = 0; + if (flags & O_FBLOCKING) { + /* ioflag &= ~IO_NDELAY; */ + } else if (flags & O_FNONBLOCKING) { + ioflag |= IO_NDELAY; + } else if (fp->f_flag & FNONBLOCK) { + ioflag |= IO_NDELAY; + } + if (flags & O_FBUFFERED) { + /* ioflag &= ~IO_DIRECT; */ + } else if (flags & O_FUNBUFFERED) { + ioflag |= IO_DIRECT; + } else if (fp->f_flag & O_DIRECT) { + ioflag |= IO_DIRECT; + } + ioflag |= sequential_heuristic(uio, fp); + + error = dev_dread(dev, uio, ioflag); + + release_dev(dev); + if (DEVFS_NODE(vp)) + nanotime(&DEVFS_NODE(vp)->atime); + if ((flags & O_FOFFSET) == 0) + fp->f_offset = uio->uio_offset; + fp->f_nextoff = uio->uio_offset; +done: + rel_mplock(); + //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_read finished\n"); + return (error); +} + + +static int +devfs_specf_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags) +{ + struct vnode *vp; + int ioflag; + int error; + cdev_t dev; + + //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_write() called! \n"); + get_mplock(); + KASSERT(uio->uio_td == curthread, + ("uio_td %p is not p %p", uio->uio_td, curthread)); + + vp = (struct vnode *)fp->f_data; + if (vp == NULL || vp->v_type == VBAD) { + error = EBADF; + goto done; + } + if (vp->v_type == VREG) + bwillwrite(uio->uio_resid); + vp = (struct vnode *)fp->f_data; + + if ((dev = vp->v_rdev) == NULL) { + error = EBADF; + goto done; + } + //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_write() called! for dev %s\n", dev->si_name); + reference_dev(dev); + + if ((flags & O_FOFFSET) == 0) + uio->uio_offset = fp->f_offset; + + ioflag = IO_UNIT; + if (vp->v_type == VREG && + ((fp->f_flag & O_APPEND) || (flags & O_FAPPEND))) { + ioflag |= IO_APPEND; + } + + if (flags & O_FBLOCKING) { + /* ioflag &= ~IO_NDELAY; */ + } else if (flags & O_FNONBLOCKING) { + ioflag |= IO_NDELAY; + } else if (fp->f_flag & FNONBLOCK) { + ioflag |= IO_NDELAY; + } + if (flags & O_FBUFFERED) { + /* ioflag &= ~IO_DIRECT; */ + } else if (flags & O_FUNBUFFERED) { + ioflag |= IO_DIRECT; + } else if (fp->f_flag & O_DIRECT) { + ioflag |= IO_DIRECT; + } + if (flags & O_FASYNCWRITE) { + /* ioflag &= ~IO_SYNC; */ + } else if (flags & O_FSYNCWRITE) { + ioflag |= IO_SYNC; + } else if (fp->f_flag & O_FSYNC) { + ioflag |= IO_SYNC; + } + + if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)) + ioflag |= IO_SYNC; + ioflag |= sequential_heuristic(uio, fp); + + error = dev_dwrite(dev, uio, ioflag); + + release_dev(dev); + if (DEVFS_NODE(vp)) + nanotime(&DEVFS_NODE(vp)->mtime); + + if ((flags & O_FOFFSET) == 0) + fp->f_offset = uio->uio_offset; + fp->f_nextoff = uio->uio_offset; +done: + rel_mplock(); + //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_write done\n"); + return (error); +} + + +static int +devfs_specf_stat(struct file *fp, struct stat *sb, struct ucred *cred) +{ + struct vnode *vp; + int error; + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_stat() called\n"); + + get_mplock(); + vp = (struct vnode *)fp->f_data; + error = vn_stat(vp, sb, cred); + if (error) { + rel_mplock(); + return (error); + } + + struct vattr vattr; + struct vattr *vap; + u_short mode; + cdev_t dev; + + vap = &vattr; + error = VOP_GETATTR(vp, vap); + if (error) { + rel_mplock(); + return (error); + } + + /* + * Zero the spare stat fields + */ + sb->st_lspare = 0; + sb->st_qspare = 0; + + /* + * Copy from vattr table ... or not in case it's a cloned device + */ + if (vap->va_fsid != VNOVAL) + sb->st_dev = vap->va_fsid; + else + sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0]; + + sb->st_ino = vap->va_fileid; + + mode = vap->va_mode; + mode |= S_IFCHR; + sb->st_mode = mode; + + if (vap->va_nlink > (nlink_t)-1) + sb->st_nlink = (nlink_t)-1; + else + sb->st_nlink = vap->va_nlink; + sb->st_uid = vap->va_uid; + sb->st_gid = vap->va_gid; + sb->st_rdev = 0; + sb->st_size = vap->va_size; + sb->st_atimespec = vap->va_atime; + sb->st_mtimespec = vap->va_mtime; + sb->st_ctimespec = vap->va_ctime; + + /* + * A VCHR and VBLK device may track the last access and last modified + * time independantly of the filesystem. This is particularly true + * because device read and write calls may bypass the filesystem. + */ + if (vp->v_type == VCHR || vp->v_type == VBLK) { + dev = vp->v_rdev; + if (dev != NULL) { + if (dev->si_lastread) { + sb->st_atimespec.tv_sec = dev->si_lastread; + sb->st_atimespec.tv_nsec = 0; + } + if (dev->si_lastwrite) { + sb->st_atimespec.tv_sec = dev->si_lastwrite; + sb->st_atimespec.tv_nsec = 0; + } + } + } + + /* + * According to www.opengroup.org, the meaning of st_blksize is + * "a filesystem-specific preferred I/O block size for this + * object. In some filesystem types, this may vary from file + * to file" + * Default to PAGE_SIZE after much discussion. + */ + + sb->st_blksize = PAGE_SIZE; + + sb->st_flags = vap->va_flags; + + error = priv_check_cred(cred, PRIV_VFS_GENERATION, 0); + if (error) + sb->st_gen = 0; + else + sb->st_gen = (u_int32_t)vap->va_gen; + + sb->st_blocks = vap->va_bytes / S_BLKSIZE; + sb->st_fsmid = vap->va_fsmid; + + rel_mplock(); + return (0); +} + + +static int +devfs_specf_kqfilter(struct file *fp, struct knote *kn) +{ + struct vnode *vp; + //int ioflag; + int error; + cdev_t dev; + + //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_kqfilter() called! \n"); + + get_mplock(); + + vp = (struct vnode *)fp->f_data; + if (vp == NULL || vp->v_type == VBAD) { + error = EBADF; + goto done; + } + + if ((dev = vp->v_rdev) == NULL) { + error = EBADF; + goto done; + } + reference_dev(dev); + + error = dev_dkqfilter(dev, kn); + + release_dev(dev); + + if (DEVFS_NODE(vp)) + nanotime(&DEVFS_NODE(vp)->atime); +done: + rel_mplock(); + return (error); +} + + +static int +devfs_specf_poll(struct file *fp, int events, struct ucred *cred) +{ + struct vnode *vp; + //int ioflag; + int error; + cdev_t dev; + + //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_poll() called! \n"); + + get_mplock(); + + vp = (struct vnode *)fp->f_data; + if (vp == NULL || vp->v_type == VBAD) { + error = EBADF; + goto done; + } + + if ((dev = vp->v_rdev) == NULL) { + error = EBADF; + goto done; + } + reference_dev(dev); + error = dev_dpoll(dev, events); + + release_dev(dev); + + if (DEVFS_NODE(vp)) + nanotime(&DEVFS_NODE(vp)->atime); +done: + rel_mplock(); + return (error); +} + + +/* + * MPALMOSTSAFE - acquires mplock + */ +static int +devfs_specf_ioctl(struct file *fp, u_long com, caddr_t data, struct ucred *ucred) +{ + struct vnode *vp = ((struct vnode *)fp->f_data); + struct vnode *ovp; + //struct vattr vattr; + cdev_t dev; + int error; + struct fiodname_args *name_args; + size_t namlen; + const char *name; + + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_ioctl() called! \n"); + + get_mplock(); + + if ((dev = vp->v_rdev) == NULL) { + error = EBADF; /* device was revoked */ + goto out; + } + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_ioctl() called! for dev %s\n", dev->si_name); + + if (!(dev_dflags(dev) & D_TTY)) + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_ioctl() called on %s! com is: %x\n", dev->si_name, com); + + if (com == FIODTYPE) { + *(int *)data = dev_dflags(dev) & D_TYPEMASK; + error = 0; + goto out; + } else if (com == FIODNAME) { + name_args = (struct fiodname_args *)data; + name = dev->si_name; + namlen = strlen(name) + 1; + + devfs_debug(DEVFS_DEBUG_DEBUG, "ioctl, got: FIODNAME for %s\n", name); + + if (namlen <= name_args->len) + error = copyout(dev->si_name, name_args->name, namlen); + else + error = EINVAL; + + //name_args->len = namlen; //need _IOWR to enable this + devfs_debug(DEVFS_DEBUG_DEBUG, "ioctl stuff: error: %d\n", error); + goto out; + } + reference_dev(dev); + error = dev_dioctl(dev, com, data, fp->f_flag, ucred); + release_dev(dev); + if (DEVFS_NODE(vp)) { + nanotime(&DEVFS_NODE(vp)->atime); + nanotime(&DEVFS_NODE(vp)->mtime); + } + + if (com == TIOCSCTTY) + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_ioctl: got TIOCSCTTY on %s\n", dev->si_name); + if (error == 0 && com == TIOCSCTTY) { + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_ioctl: dealing with TIOCSCTTY on %s\n", dev->si_name); + struct proc *p = curthread->td_proc; + struct session *sess; + if (p == NULL) { + error = ENOTTY; + goto out; + } + sess = p->p_session; + /* Do nothing if reassigning same control tty */ + if (sess->s_ttyvp == vp) { + error = 0; + goto out; + } + /* Get rid of reference to old control tty */ + ovp = sess->s_ttyvp; + vref(vp); + sess->s_ttyvp = vp; + if (ovp) + vrele(ovp); + } + +out: + rel_mplock(); + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_ioctl() finished! \n"); + return (error); +} + + +static int +devfs_spec_fsync(struct vop_fsync_args *ap) +{ + struct vnode *vp = ap->a_vp; + int error; + + if (!vn_isdisk(vp, NULL)) + return (0); + + /* + * Flush all dirty buffers associated with a block device. + */ + error = vfsync(vp, ap->a_waitfor, 10000, NULL, NULL); + return (error); +} + + + + + + + + + + + + + + + + + + + + +static int +devfs_spec_read(struct vop_read_args *ap) +{ + struct vnode *vp; + struct uio *uio; + cdev_t dev; + int error; + + vp = ap->a_vp; + dev = vp->v_rdev; + uio = ap->a_uio; + + if (dev == NULL) /* device was revoked */ + return (EBADF); + if (uio->uio_resid == 0) + return (0); + + vn_unlock(vp); + error = dev_dread(dev, uio, ap->a_ioflag); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + + if (DEVFS_NODE(vp)) + nanotime(&DEVFS_NODE(vp)->atime); + + return (error); +} + +/* + * Vnode op for write + * + * spec_write(struct vnode *a_vp, struct uio *a_uio, int a_ioflag, + * struct ucred *a_cred) + */ +/* ARGSUSED */ +static int +devfs_spec_write(struct vop_write_args *ap) +{ + struct vnode *vp; + struct uio *uio; + cdev_t dev; + int error; + + vp = ap->a_vp; + dev = vp->v_rdev; + uio = ap->a_uio; + + KKASSERT(uio->uio_segflg != UIO_NOCOPY); + + if (dev == NULL) /* device was revoked */ + return (EBADF); + + vn_unlock(vp); + error = dev_dwrite(dev, uio, ap->a_ioflag); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + + if (DEVFS_NODE(vp)) + nanotime(&DEVFS_NODE(vp)->mtime); + + return (error); +} + +/* + * Device ioctl operation. + * + * spec_ioctl(struct vnode *a_vp, int a_command, caddr_t a_data, + * int a_fflag, struct ucred *a_cred) + */ +/* ARGSUSED */ +static int +devfs_spec_ioctl(struct vop_ioctl_args *ap) +{ + cdev_t dev; + struct vnode *vp = ap->a_vp; + + if ((dev = vp->v_rdev) == NULL) + return (EBADF); /* device was revoked */ + if ( ap->a_command == TIOCSCTTY ) + devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_*SPEC*_ioctl: got TIOCSCTTY\n"); + + if (DEVFS_NODE(vp)) { + nanotime(&DEVFS_NODE(vp)->atime); + nanotime(&DEVFS_NODE(vp)->mtime); + } + + return (dev_dioctl(dev, ap->a_command, ap->a_data, + ap->a_fflag, ap->a_cred)); +} + +/* + * spec_poll(struct vnode *a_vp, int a_events, struct ucred *a_cred) + */ +/* ARGSUSED */ +static int +devfs_spec_poll(struct vop_poll_args *ap) +{ + cdev_t dev; + struct vnode *vp = ap->a_vp; + + if ((dev = vp->v_rdev) == NULL) + return (EBADF); /* device was revoked */ + + if (DEVFS_NODE(vp)) + nanotime(&DEVFS_NODE(vp)->atime); + + return (dev_dpoll(dev, ap->a_events)); +} + +/* + * spec_kqfilter(struct vnode *a_vp, struct knote *a_kn) + */ +/* ARGSUSED */ +static int +devfs_spec_kqfilter(struct vop_kqfilter_args *ap) +{ + cdev_t dev; + struct vnode *vp = ap->a_vp; + + if ((dev = vp->v_rdev) == NULL) + return (EBADF); /* device was revoked */ + + if (DEVFS_NODE(vp)) + nanotime(&DEVFS_NODE(vp)->atime); + + return (dev_dkqfilter(dev, ap->a_kn)); +} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +/* + * Convert a vnode strategy call into a device strategy call. Vnode strategy + * calls are not limited to device DMA limits so we have to deal with the + * case. + * + * spec_strategy(struct vnode *a_vp, struct bio *a_bio) + */ +static int +devfs_spec_strategy(struct vop_strategy_args *ap) +{ + struct bio *bio = ap->a_bio; + struct buf *bp = bio->bio_buf; + struct buf *nbp; + struct vnode *vp; + struct mount *mp; + int chunksize; + int maxiosize; + + if (bp->b_cmd != BUF_CMD_READ && LIST_FIRST(&bp->b_dep) != NULL) + buf_start(bp); + + /* + * Collect statistics on synchronous and asynchronous read + * and write counts for disks that have associated filesystems. + */ + vp = ap->a_vp; + KKASSERT(vp->v_rdev != NULL); /* XXX */ + if (vn_isdisk(vp, NULL) && (mp = vp->v_rdev->si_mountpoint) != NULL) { + if (bp->b_cmd == BUF_CMD_READ) { + //XXX: no idea what has changed here... + if (bp->b_flags & BIO_SYNC) + mp->mnt_stat.f_syncreads++; + else + mp->mnt_stat.f_asyncreads++; + } else { + if (bp->b_flags & BIO_SYNC) + mp->mnt_stat.f_syncwrites++; + else + mp->mnt_stat.f_asyncwrites++; + } + } + + /* + * Device iosize limitations only apply to read and write. Shortcut + * the I/O if it fits. + */ + if ((maxiosize = vp->v_rdev->si_iosize_max) == 0) { + devfs_debug(DEVFS_DEBUG_DEBUG, "%s: si_iosize_max not set!\n", dev_dname(vp->v_rdev)); + maxiosize = MAXPHYS; + } +#if SPEC_CHAIN_DEBUG & 2 + maxiosize = 4096; +#endif + if (bp->b_bcount <= maxiosize || + (bp->b_cmd != BUF_CMD_READ && bp->b_cmd != BUF_CMD_WRITE)) { + dev_dstrategy_chain(vp->v_rdev, bio); + return (0); + } + + /* + * Clone the buffer and set up an I/O chain to chunk up the I/O. + */ + nbp = kmalloc(sizeof(*bp), M_DEVBUF, M_INTWAIT|M_ZERO); + initbufbio(nbp); + buf_dep_init(nbp); + BUF_LOCKINIT(nbp); + BUF_LOCK(nbp, LK_EXCLUSIVE); + BUF_KERNPROC(nbp); + nbp->b_vp = vp; + nbp->b_flags = B_PAGING | (bp->b_flags & B_BNOCLIP); + nbp->b_data = bp->b_data; + nbp->b_bio1.bio_done = devfs_spec_strategy_done; + nbp->b_bio1.bio_offset = bio->bio_offset; + nbp->b_bio1.bio_caller_info1.ptr = bio; + + /* + * Start the first transfer + */ + if (vn_isdisk(vp, NULL)) + chunksize = vp->v_rdev->si_bsize_phys; + else + chunksize = DEV_BSIZE; + chunksize = maxiosize / chunksize * chunksize; +#if SPEC_CHAIN_DEBUG & 1 + devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy chained I/O chunksize=%d\n", chunksize); +#endif + nbp->b_cmd = bp->b_cmd; + nbp->b_bcount = chunksize; + nbp->b_bufsize = chunksize; /* used to detect a short I/O */ + nbp->b_bio1.bio_caller_info2.index = chunksize; + +#if SPEC_CHAIN_DEBUG & 1 + devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p offset %d/%d bcount %d\n", + bp, 0, bp->b_bcount, nbp->b_bcount); +#endif + + dev_dstrategy(vp->v_rdev, &nbp->b_bio1); + + if (DEVFS_NODE(vp)) { + nanotime(&DEVFS_NODE(vp)->atime); + nanotime(&DEVFS_NODE(vp)->mtime); + } + + return (0); +} + +/* + * Chunked up transfer completion routine - chain transfers until done + */ +static +void +devfs_spec_strategy_done(struct bio *nbio) +{ + struct buf *nbp = nbio->bio_buf; + struct bio *bio = nbio->bio_caller_info1.ptr; /* original bio */ + struct buf *bp = bio->bio_buf; /* original bp */ + int chunksize = nbio->bio_caller_info2.index; /* chunking */ + int boffset = nbp->b_data - bp->b_data; + + if (nbp->b_flags & B_ERROR) { + /* + * An error terminates the chain, propogate the error back + * to the original bp + */ + bp->b_flags |= B_ERROR; + bp->b_error = nbp->b_error; + bp->b_resid = bp->b_bcount - boffset + + (nbp->b_bcount - nbp->b_resid); +#if SPEC_CHAIN_DEBUG & 1 + devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p error %d bcount %d/%d\n", + bp, bp->b_error, bp->b_bcount, + bp->b_bcount - bp->b_resid); +#endif + kfree(nbp, M_DEVBUF); + biodone(bio); + } else if (nbp->b_resid) { + /* + * A short read or write terminates the chain + */ + bp->b_error = nbp->b_error; + bp->b_resid = bp->b_bcount - boffset + + (nbp->b_bcount - nbp->b_resid); +#if SPEC_CHAIN_DEBUG & 1 + devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p short read(1) bcount %d/%d\n", + bp, bp->b_bcount - bp->b_resid, bp->b_bcount); +#endif + kfree(nbp, M_DEVBUF); + biodone(bio); + } else if (nbp->b_bcount != nbp->b_bufsize) { + /* + * A short read or write can also occur by truncating b_bcount + */ +#if SPEC_CHAIN_DEBUG & 1 + devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p short read(2) bcount %d/%d\n", + bp, nbp->b_bcount + boffset, bp->b_bcount); +#endif + bp->b_error = 0; + bp->b_bcount = nbp->b_bcount + boffset; + bp->b_resid = nbp->b_resid; + kfree(nbp, M_DEVBUF); + biodone(bio); + } else if (nbp->b_bcount + boffset == bp->b_bcount) { + /* + * No more data terminates the chain + */ +#if SPEC_CHAIN_DEBUG & 1 + devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p finished bcount %d\n", + bp, bp->b_bcount); +#endif + bp->b_error = 0; + bp->b_resid = 0; + kfree(nbp, M_DEVBUF); + biodone(bio); + } else { + /* + * Continue the chain + */ + boffset += nbp->b_bcount; + nbp->b_data = bp->b_data + boffset; + nbp->b_bcount = bp->b_bcount - boffset; + if (nbp->b_bcount > chunksize) + nbp->b_bcount = chunksize; + nbp->b_bio1.bio_done = devfs_spec_strategy_done; + nbp->b_bio1.bio_offset = bio->bio_offset + boffset; + +#if SPEC_CHAIN_DEBUG & 1 + devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p offset %d/%d bcount %d\n", + bp, boffset, bp->b_bcount, nbp->b_bcount); +#endif + + dev_dstrategy(nbp->b_vp->v_rdev, &nbp->b_bio1); + } +} + +/* + * spec_freeblks(struct vnode *a_vp, daddr_t a_addr, daddr_t a_length) + */ +static int +devfs_spec_freeblks(struct vop_freeblks_args *ap) +{ + struct buf *bp; + + /* + * XXX: This assumes that strategy does the deed right away. + * XXX: this may not be TRTTD. + */ + KKASSERT(ap->a_vp->v_rdev != NULL); + if ((dev_dflags(ap->a_vp->v_rdev) & D_CANFREE) == 0) + return (0); + bp = geteblk(ap->a_length); + bp->b_cmd = BUF_CMD_FREEBLKS; + bp->b_bio1.bio_offset = ap->a_offset; + bp->b_bcount = ap->a_length; + dev_dstrategy(ap->a_vp->v_rdev, &bp->b_bio1); + return (0); +} + +/* + * Implement degenerate case where the block requested is the block + * returned, and assume that the entire device is contiguous in regards + * to the contiguous block range (runp and runb). + * + * spec_bmap(struct vnode *a_vp, off_t a_loffset, + * off_t *a_doffsetp, int *a_runp, int *a_runb) + */ +static int +devfs_spec_bmap(struct vop_bmap_args *ap) +{ + if (ap->a_doffsetp != NULL) + *ap->a_doffsetp = ap->a_loffset; + if (ap->a_runp != NULL) + *ap->a_runp = MAXBSIZE; + if (ap->a_runb != NULL) { + if (ap->a_loffset < MAXBSIZE) + *ap->a_runb = (int)ap->a_loffset; + else + *ap->a_runb = MAXBSIZE; + } + return (0); +} + + +/* + * Special device advisory byte-level locks. + * + * spec_advlock(struct vnode *a_vp, caddr_t a_id, int a_op, + * struct flock *a_fl, int a_flags) + */ +/* ARGSUSED */ +static int +devfs_spec_advlock(struct vop_advlock_args *ap) +{ + return ((ap->a_flags & F_POSIX) ? EINVAL : EOPNOTSUPP); +} + +static void +devfs_spec_getpages_iodone(struct bio *bio) +{ + bio->bio_buf->b_cmd = BUF_CMD_DONE; + wakeup(bio->bio_buf); +} + +/* + * spec_getpages() - get pages associated with device vnode. + * + * Note that spec_read and spec_write do not use the buffer cache, so we + * must fully implement getpages here. + */ +static int +devfs_spec_getpages(struct vop_getpages_args *ap) +{ + vm_offset_t kva; + int error; + int i, pcount, size; + struct buf *bp; + vm_page_t m; + vm_ooffset_t offset; + int toff, nextoff, nread; + struct vnode *vp = ap->a_vp; + int blksiz; + int gotreqpage; + + error = 0; + pcount = round_page(ap->a_count) / PAGE_SIZE; + + /* + * Calculate the offset of the transfer and do sanity check. + */ + offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset; + + /* + * Round up physical size for real devices. We cannot round using + * v_mount's block size data because v_mount has nothing to do with + * the device. i.e. it's usually '/dev'. We need the physical block + * size for the device itself. + * + * We can't use v_rdev->si_mountpoint because it only exists when the + * block device is mounted. However, we can use v_rdev. + */ + + if (vn_isdisk(vp, NULL)) + blksiz = vp->v_rdev->si_bsize_phys; + else + blksiz = DEV_BSIZE; + + size = (ap->a_count + blksiz - 1) & ~(blksiz - 1); + + bp = getpbuf(NULL); + kva = (vm_offset_t)bp->b_data; + + /* + * Map the pages to be read into the kva. + */ + pmap_qenter(kva, ap->a_m, pcount); + + /* Build a minimal buffer header. */ + bp->b_cmd = BUF_CMD_READ; + bp->b_bcount = size; + bp->b_resid = 0; + bp->b_runningbufspace = size; + if (size) { + runningbufspace += bp->b_runningbufspace; + ++runningbufcount; + } + + bp->b_bio1.bio_offset = offset; + bp->b_bio1.bio_done = devfs_spec_getpages_iodone; + + mycpu->gd_cnt.v_vnodein++; + mycpu->gd_cnt.v_vnodepgsin += pcount; + + /* Do the input. */ + vn_strategy(ap->a_vp, &bp->b_bio1); + + crit_enter(); + + /* We definitely need to be at splbio here. */ + while (bp->b_cmd != BUF_CMD_DONE) + tsleep(bp, 0, "spread", 0); + + crit_exit(); + + if (bp->b_flags & B_ERROR) { + if (bp->b_error) + error = bp->b_error; + else + error = EIO; + } + + /* + * If EOF is encountered we must zero-extend the result in order + * to ensure that the page does not contain garabge. When no + * error occurs, an early EOF is indicated if b_bcount got truncated. + * b_resid is relative to b_bcount and should be 0, but some devices + * might indicate an EOF with b_resid instead of truncating b_bcount. + */ + nread = bp->b_bcount - bp->b_resid; + if (nread < ap->a_count) + bzero((caddr_t)kva + nread, ap->a_count - nread); + pmap_qremove(kva, pcount); + + gotreqpage = 0; + for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) { + nextoff = toff + PAGE_SIZE; + m = ap->a_m[i]; + + m->flags &= ~PG_ZERO; + + if (nextoff <= nread) { + m->valid = VM_PAGE_BITS_ALL; + vm_page_undirty(m); + } else if (toff < nread) { + /* + * Since this is a VM request, we have to supply the + * unaligned offset to allow vm_page_set_validclean() + * to zero sub-DEV_BSIZE'd portions of the page. + */ + vm_page_set_validclean(m, 0, nread - toff); + } else { + m->valid = 0; + vm_page_undirty(m); + } + + if (i != ap->a_reqpage) { + /* + * Just in case someone was asking for this page we + * now tell them that it is ok to use. + */ + if (!error || (m->valid == VM_PAGE_BITS_ALL)) { + if (m->valid) { + if (m->flags & PG_WANTED) { + vm_page_activate(m); + } else { + vm_page_deactivate(m); + } + vm_page_wakeup(m); + } else { + vm_page_free(m); + } + } else { + vm_page_free(m); + } + } else if (m->valid) { + gotreqpage = 1; + /* + * Since this is a VM request, we need to make the + * entire page presentable by zeroing invalid sections. + */ + if (m->valid != VM_PAGE_BITS_ALL) + vm_page_zero_invalid(m, FALSE); + } + } + if (!gotreqpage) { + m = ap->a_m[ap->a_reqpage]; + devfs_debug(DEVFS_DEBUG_WARNING, + "spec_getpages:(%s) I/O read failure: (error=%d) bp %p vp %p\n", + devtoname(vp->v_rdev), error, bp, bp->b_vp); + devfs_debug(DEVFS_DEBUG_WARNING, + " size: %d, resid: %d, a_count: %d, valid: 0x%x\n", + size, bp->b_resid, ap->a_count, m->valid); + devfs_debug(DEVFS_DEBUG_WARNING, + " nread: %d, reqpage: %d, pindex: %lu, pcount: %d\n", + nread, ap->a_reqpage, (u_long)m->pindex, pcount); + /* + * Free the buffer header back to the swap buffer pool. + */ + relpbuf(bp, NULL); + return VM_PAGER_ERROR; + } + /* + * Free the buffer header back to the swap buffer pool. + */ + relpbuf(bp, NULL); + return VM_PAGER_OK; +} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +static __inline +int +sequential_heuristic(struct uio *uio, struct file *fp) +{ + /* + * Sequential heuristic - detect sequential operation + */ + if ((uio->uio_offset == 0 && fp->f_seqcount > 0) || + uio->uio_offset == fp->f_nextoff) { + int tmpseq = fp->f_seqcount; + /* + * XXX we assume that the filesystem block size is + * the default. Not true, but still gives us a pretty + * good indicator of how sequential the read operations + * are. + */ + tmpseq += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE; + if (tmpseq > IO_SEQMAX) + tmpseq = IO_SEQMAX; + fp->f_seqcount = tmpseq; + return(fp->f_seqcount << IO_SEQSHIFT); + } + + /* + * Not sequential, quick draw-down of seqcount + */ + if (fp->f_seqcount > 1) + fp->f_seqcount = 1; + else + fp->f_seqcount = 0; + return(0); +}