2 * Copyright (c) 2009 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Alex Hornung <ahornung@gmail.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/param.h>
35 #include <sys/systm.h>
37 #include <sys/kernel.h>
39 #include <sys/fcntl.h>
42 #include <sys/signalvar.h>
43 #include <sys/vnode.h>
45 #include <sys/mount.h>
47 #include <sys/fcntl.h>
48 #include <sys/namei.h>
49 #include <sys/dirent.h>
50 #include <sys/malloc.h>
53 #include <vm/vm_pager.h>
54 #include <vm/vm_zone.h>
55 #include <vm/vm_object.h>
56 #include <sys/filio.h>
57 #include <sys/ttycom.h>
59 #include <sys/diskslice.h>
60 #include <sys/devfs.h>
61 #include <sys/pioctl.h>
63 #include <machine/limits.h>
66 #include <sys/sysref2.h>
67 #include <sys/mplock2.h>
68 #include <vm/vm_page2.h>
70 MALLOC_DECLARE(M_DEVFS);
71 #define DEVFS_BADOP (void *)devfs_badop
73 static int devfs_badop(struct vop_generic_args *);
74 static int devfs_access(struct vop_access_args *);
75 static int devfs_inactive(struct vop_inactive_args *);
76 static int devfs_reclaim(struct vop_reclaim_args *);
77 static int devfs_readdir(struct vop_readdir_args *);
78 static int devfs_getattr(struct vop_getattr_args *);
79 static int devfs_setattr(struct vop_setattr_args *);
80 static int devfs_readlink(struct vop_readlink_args *);
81 static int devfs_print(struct vop_print_args *);
83 static int devfs_nresolve(struct vop_nresolve_args *);
84 static int devfs_nlookupdotdot(struct vop_nlookupdotdot_args *);
85 static int devfs_nsymlink(struct vop_nsymlink_args *);
86 static int devfs_nremove(struct vop_nremove_args *);
88 static int devfs_spec_open(struct vop_open_args *);
89 static int devfs_spec_close(struct vop_close_args *);
90 static int devfs_spec_fsync(struct vop_fsync_args *);
92 static int devfs_spec_read(struct vop_read_args *);
93 static int devfs_spec_write(struct vop_write_args *);
94 static int devfs_spec_ioctl(struct vop_ioctl_args *);
95 static int devfs_spec_poll(struct vop_poll_args *);
96 static int devfs_spec_kqfilter(struct vop_kqfilter_args *);
97 static int devfs_spec_strategy(struct vop_strategy_args *);
98 static void devfs_spec_strategy_done(struct bio *);
99 static int devfs_spec_freeblks(struct vop_freeblks_args *);
100 static int devfs_spec_bmap(struct vop_bmap_args *);
101 static int devfs_spec_advlock(struct vop_advlock_args *);
102 static void devfs_spec_getpages_iodone(struct bio *);
103 static int devfs_spec_getpages(struct vop_getpages_args *);
106 static int devfs_specf_close(struct file *);
107 static int devfs_specf_read(struct file *, struct uio *, struct ucred *, int);
108 static int devfs_specf_write(struct file *, struct uio *, struct ucred *, int);
109 static int devfs_specf_stat(struct file *, struct stat *, struct ucred *);
110 static int devfs_specf_kqfilter(struct file *, struct knote *);
111 static int devfs_specf_poll(struct file *, int, struct ucred *);
112 static int devfs_specf_ioctl(struct file *, u_long, caddr_t,
113 struct ucred *, struct sysmsg *);
114 static __inline int sequential_heuristic(struct uio *, struct file *);
116 extern struct lock devfs_lock;
119 * devfs vnode operations for regular files
121 struct vop_ops devfs_vnode_norm_vops = {
122 .vop_default = vop_defaultop,
123 .vop_access = devfs_access,
124 .vop_advlock = DEVFS_BADOP,
125 .vop_bmap = DEVFS_BADOP,
126 .vop_close = vop_stdclose,
127 .vop_getattr = devfs_getattr,
128 .vop_inactive = devfs_inactive,
129 .vop_ncreate = DEVFS_BADOP,
130 .vop_nresolve = devfs_nresolve,
131 .vop_nlookupdotdot = devfs_nlookupdotdot,
132 .vop_nlink = DEVFS_BADOP,
133 .vop_nmkdir = DEVFS_BADOP,
134 .vop_nmknod = DEVFS_BADOP,
135 .vop_nremove = devfs_nremove,
136 .vop_nrename = DEVFS_BADOP,
137 .vop_nrmdir = DEVFS_BADOP,
138 .vop_nsymlink = devfs_nsymlink,
139 .vop_open = vop_stdopen,
140 .vop_pathconf = vop_stdpathconf,
141 .vop_print = devfs_print,
142 .vop_read = DEVFS_BADOP,
143 .vop_readdir = devfs_readdir,
144 .vop_readlink = devfs_readlink,
145 .vop_reclaim = devfs_reclaim,
146 .vop_setattr = devfs_setattr,
147 .vop_write = DEVFS_BADOP,
148 .vop_ioctl = DEVFS_BADOP
152 * devfs vnode operations for character devices
154 struct vop_ops devfs_vnode_dev_vops = {
155 .vop_default = vop_defaultop,
156 .vop_access = devfs_access,
157 .vop_advlock = devfs_spec_advlock,
158 .vop_bmap = devfs_spec_bmap,
159 .vop_close = devfs_spec_close,
160 .vop_freeblks = devfs_spec_freeblks,
161 .vop_fsync = devfs_spec_fsync,
162 .vop_getattr = devfs_getattr,
163 .vop_getpages = devfs_spec_getpages,
164 .vop_inactive = devfs_inactive,
165 .vop_open = devfs_spec_open,
166 .vop_pathconf = vop_stdpathconf,
167 .vop_print = devfs_print,
168 .vop_poll = devfs_spec_poll,
169 .vop_kqfilter = devfs_spec_kqfilter,
170 .vop_read = devfs_spec_read,
171 .vop_readdir = DEVFS_BADOP,
172 .vop_readlink = DEVFS_BADOP,
173 .vop_reclaim = devfs_reclaim,
174 .vop_setattr = devfs_setattr,
175 .vop_strategy = devfs_spec_strategy,
176 .vop_write = devfs_spec_write,
177 .vop_ioctl = devfs_spec_ioctl
180 struct vop_ops *devfs_vnode_dev_vops_p = &devfs_vnode_dev_vops;
182 struct fileops devfs_dev_fileops = {
183 .fo_read = devfs_specf_read,
184 .fo_write = devfs_specf_write,
185 .fo_ioctl = devfs_specf_ioctl,
186 .fo_poll = devfs_specf_poll,
187 .fo_kqfilter = devfs_specf_kqfilter,
188 .fo_stat = devfs_specf_stat,
189 .fo_close = devfs_specf_close,
190 .fo_shutdown = nofo_shutdown
194 * These two functions are possibly temporary hacks for
195 * devices (aka the pty code) which want to control the
196 * node attributes themselves.
198 * XXX we may ultimately desire to simply remove the uid/gid/mode
199 * from the node entirely.
202 node_sync_dev_get(struct devfs_node *node)
206 if ((dev = node->d_dev) && (dev->si_flags & SI_OVERRIDE)) {
207 node->uid = dev->si_uid;
208 node->gid = dev->si_gid;
209 node->mode = dev->si_perms;
214 node_sync_dev_set(struct devfs_node *node)
218 if ((dev = node->d_dev) && (dev->si_flags & SI_OVERRIDE)) {
219 dev->si_uid = node->uid;
220 dev->si_gid = node->gid;
221 dev->si_perms = node->mode;
226 * generic entry point for unsupported operations
229 devfs_badop(struct vop_generic_args *ap)
236 devfs_access(struct vop_access_args *ap)
238 struct devfs_node *node = DEVFS_NODE(ap->a_vp);
241 if (!devfs_node_is_accessible(node))
243 node_sync_dev_get(node);
244 error = vop_helper_access(ap, node->uid, node->gid,
245 node->mode, node->flags);
252 devfs_inactive(struct vop_inactive_args *ap)
254 struct devfs_node *node = DEVFS_NODE(ap->a_vp);
256 if (node == NULL || (node->flags & DEVFS_NODE_LINKED) == 0)
263 devfs_reclaim(struct vop_reclaim_args *ap)
265 struct devfs_node *node;
270 * Check if it is locked already. if not, we acquire the devfs lock
272 if (!(lockstatus(&devfs_lock, curthread)) == LK_EXCLUSIVE) {
273 lockmgr(&devfs_lock, LK_EXCLUSIVE);
280 * Get rid of the devfs_node if it is no longer linked into the
284 if ((node = DEVFS_NODE(vp)) != NULL) {
286 if ((node->flags & DEVFS_NODE_LINKED) == 0)
291 lockmgr(&devfs_lock, LK_RELEASE);
294 * v_rdev needs to be properly released using v_release_rdev
295 * Make sure v_data is NULL as well.
304 devfs_readdir(struct vop_readdir_args *ap)
306 struct devfs_node *dnode = DEVFS_NODE(ap->a_vp);
307 struct devfs_node *node;
316 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_readdir() called!\n");
318 if (ap->a_uio->uio_offset < 0 || ap->a_uio->uio_offset > INT_MAX)
320 if ((error = vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY)) != 0)
323 if (!devfs_node_is_accessible(dnode)) {
328 lockmgr(&devfs_lock, LK_EXCLUSIVE);
330 saveoff = ap->a_uio->uio_offset;
332 if (ap->a_ncookies) {
333 ncookies = ap->a_uio->uio_resid / 16 + 1; /* Why / 16 ?? */
336 cookies = kmalloc(256 * sizeof(off_t), M_TEMP, M_WAITOK);
344 nanotime(&dnode->atime);
347 r = vop_write_dirent(&error, ap->a_uio, dnode->d_dir.d_ino,
352 cookies[cookie_index] = saveoff;
355 if (cookie_index == ncookies)
361 r = vop_write_dirent(&error, ap->a_uio,
362 dnode->parent->d_dir.d_ino,
365 r = vop_write_dirent(&error, ap->a_uio,
372 cookies[cookie_index] = saveoff;
375 if (cookie_index == ncookies)
379 TAILQ_FOREACH(node, DEVFS_DENODE_HEAD(dnode), link) {
380 if ((node->flags & DEVFS_HIDDEN) ||
381 (node->flags & DEVFS_INVISIBLE)) {
386 * If the node type is a valid devfs alias, then we make sure that the
387 * target isn't hidden. If it is, we don't show the link in the
390 if ((node->node_type == Plink) && (node->link_target != NULL) &&
391 (node->link_target->flags & DEVFS_HIDDEN))
394 if (node->cookie < saveoff)
397 saveoff = node->cookie;
399 error2 = vop_write_dirent(&error, ap->a_uio, node->d_dir.d_ino,
401 node->d_dir.d_namlen,
410 cookies[cookie_index] = node->cookie;
412 if (cookie_index == ncookies)
417 lockmgr(&devfs_lock, LK_RELEASE);
420 ap->a_uio->uio_offset = saveoff;
421 if (error && cookie_index == 0) {
423 kfree(cookies, M_TEMP);
425 *ap->a_cookies = NULL;
429 *ap->a_ncookies = cookie_index;
430 *ap->a_cookies = cookies;
438 devfs_nresolve(struct vop_nresolve_args *ap)
440 struct devfs_node *dnode = DEVFS_NODE(ap->a_dvp);
441 struct devfs_node *node, *found = NULL;
442 struct namecache *ncp;
443 struct vnode *vp = NULL;
448 ncp = ap->a_nch->ncp;
451 if (!devfs_node_is_accessible(dnode))
454 lockmgr(&devfs_lock, LK_EXCLUSIVE);
456 if ((dnode->node_type != Proot) && (dnode->node_type != Pdir)) {
458 cache_setvp(ap->a_nch, NULL);
462 TAILQ_FOREACH(node, DEVFS_DENODE_HEAD(dnode), link) {
463 if (len == node->d_dir.d_namlen) {
464 if (!memcmp(ncp->nc_name, node->d_dir.d_name, len)) {
473 while ((found->node_type == Plink) && (found->link_target)) {
475 devfs_debug(DEVFS_DEBUG_SHOW, "Recursive link or depth >= 8");
479 found = found->link_target;
483 if (!(found->flags & DEVFS_HIDDEN))
484 devfs_allocv(/*ap->a_dvp->v_mount, */ &vp, found);
489 cache_setvp(ap->a_nch, NULL);
495 cache_setvp(ap->a_nch, vp);
498 lockmgr(&devfs_lock, LK_RELEASE);
505 devfs_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
507 struct devfs_node *dnode = DEVFS_NODE(ap->a_dvp);
510 if (!devfs_node_is_accessible(dnode))
513 lockmgr(&devfs_lock, LK_EXCLUSIVE);
514 if (dnode->parent != NULL) {
515 devfs_allocv(ap->a_vpp, dnode->parent);
516 vn_unlock(*ap->a_vpp);
518 lockmgr(&devfs_lock, LK_RELEASE);
520 return ((*ap->a_vpp == NULL) ? ENOENT : 0);
525 devfs_getattr(struct vop_getattr_args *ap)
527 struct devfs_node *node = DEVFS_NODE(ap->a_vp);
528 struct vattr *vap = ap->a_vap;
529 struct partinfo pinfo;
533 if (!devfs_node_is_accessible(node))
536 node_sync_dev_get(node);
538 lockmgr(&devfs_lock, LK_EXCLUSIVE);
540 /* start by zeroing out the attributes */
543 /* next do all the common fields */
544 vap->va_type = ap->a_vp->v_type;
545 vap->va_mode = node->mode;
546 vap->va_fileid = DEVFS_NODE(ap->a_vp)->d_dir.d_ino ;
547 vap->va_flags = 0; /* XXX: what should this be? */
548 vap->va_blocksize = DEV_BSIZE;
549 vap->va_bytes = vap->va_size = sizeof(struct devfs_node);
551 vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
553 vap->va_atime = node->atime;
554 vap->va_mtime = node->mtime;
555 vap->va_ctime = node->ctime;
557 vap->va_nlink = 1; /* number of references to file */
559 vap->va_uid = node->uid;
560 vap->va_gid = node->gid;
565 if ((node->node_type == Pdev) && node->d_dev) {
566 reference_dev(node->d_dev);
567 vap->va_rminor = node->d_dev->si_uminor;
568 release_dev(node->d_dev);
571 /* For a softlink the va_size is the length of the softlink */
572 if (node->symlink_name != 0) {
573 vap->va_bytes = vap->va_size = node->symlink_namelen;
577 * For a disk-type device, va_size is the size of the underlying
578 * device, so that lseek() works properly.
580 if ((node->d_dev) && (dev_dflags(node->d_dev) & D_DISK)) {
581 bzero(&pinfo, sizeof(pinfo));
582 error = dev_dioctl(node->d_dev, DIOCGPART, (void *)&pinfo,
583 0, proc0.p_ucred, NULL);
584 if ((error == 0) && (pinfo.media_blksize != 0)) {
585 vap->va_size = pinfo.media_size;
592 lockmgr(&devfs_lock, LK_RELEASE);
599 devfs_setattr(struct vop_setattr_args *ap)
601 struct devfs_node *node = DEVFS_NODE(ap->a_vp);
605 if (!devfs_node_is_accessible(node))
607 node_sync_dev_get(node);
609 lockmgr(&devfs_lock, LK_EXCLUSIVE);
613 if (vap->va_uid != (uid_t)VNOVAL) {
614 if ((ap->a_cred->cr_uid != node->uid) &&
615 (!groupmember(node->gid, ap->a_cred))) {
616 error = priv_check(curthread, PRIV_VFS_CHOWN);
620 node->uid = vap->va_uid;
623 if (vap->va_gid != (uid_t)VNOVAL) {
624 if ((ap->a_cred->cr_uid != node->uid) &&
625 (!groupmember(node->gid, ap->a_cred))) {
626 error = priv_check(curthread, PRIV_VFS_CHOWN);
630 node->gid = vap->va_gid;
633 if (vap->va_mode != (mode_t)VNOVAL) {
634 if (ap->a_cred->cr_uid != node->uid) {
635 error = priv_check(curthread, PRIV_VFS_ADMIN);
639 node->mode = vap->va_mode;
643 node_sync_dev_set(node);
644 nanotime(&node->ctime);
645 lockmgr(&devfs_lock, LK_RELEASE);
652 devfs_readlink(struct vop_readlink_args *ap)
654 struct devfs_node *node = DEVFS_NODE(ap->a_vp);
657 if (!devfs_node_is_accessible(node))
660 lockmgr(&devfs_lock, LK_EXCLUSIVE);
661 ret = uiomove(node->symlink_name, node->symlink_namelen, ap->a_uio);
662 lockmgr(&devfs_lock, LK_RELEASE);
669 devfs_print(struct vop_print_args *ap)
676 devfs_nsymlink(struct vop_nsymlink_args *ap)
678 struct devfs_node *dnode = DEVFS_NODE(ap->a_dvp);
679 struct devfs_node *node;
682 if (!devfs_node_is_accessible(dnode))
685 ap->a_vap->va_type = VLNK;
687 if ((dnode->node_type != Proot) && (dnode->node_type != Pdir))
690 lockmgr(&devfs_lock, LK_EXCLUSIVE);
691 devfs_allocvp(ap->a_dvp->v_mount, ap->a_vpp, Plink,
692 ap->a_nch->ncp->nc_name, dnode, NULL);
694 targetlen = strlen(ap->a_target);
696 node = DEVFS_NODE(*ap->a_vpp);
697 node->flags |= DEVFS_USER_CREATED;
698 node->symlink_namelen = targetlen;
699 node->symlink_name = kmalloc(targetlen + 1, M_DEVFS, M_WAITOK);
700 memcpy(node->symlink_name, ap->a_target, targetlen);
701 node->symlink_name[targetlen] = '\0';
702 cache_setunresolved(ap->a_nch);
703 cache_setvp(ap->a_nch, *ap->a_vpp);
705 lockmgr(&devfs_lock, LK_RELEASE);
707 return ((*ap->a_vpp == NULL) ? ENOTDIR : 0);
712 devfs_nremove(struct vop_nremove_args *ap)
714 struct devfs_node *dnode = DEVFS_NODE(ap->a_dvp);
715 struct devfs_node *node;
716 struct namecache *ncp;
719 ncp = ap->a_nch->ncp;
721 if (!devfs_node_is_accessible(dnode))
724 lockmgr(&devfs_lock, LK_EXCLUSIVE);
726 if ((dnode->node_type != Proot) && (dnode->node_type != Pdir))
729 TAILQ_FOREACH(node, DEVFS_DENODE_HEAD(dnode), link) {
730 if (ncp->nc_nlen != node->d_dir.d_namlen)
732 if (memcmp(ncp->nc_name, node->d_dir.d_name, ncp->nc_nlen))
736 * only allow removal of user created stuff (e.g. symlinks)
738 if ((node->flags & DEVFS_USER_CREATED) == 0) {
743 cache_inval_vp(node->v_node, CINV_DESTROY);
750 cache_setunresolved(ap->a_nch);
751 cache_setvp(ap->a_nch, NULL);
754 lockmgr(&devfs_lock, LK_RELEASE);
760 devfs_spec_open(struct vop_open_args *ap)
762 struct vnode *vp = ap->a_vp;
763 struct vnode *orig_vp = NULL;
764 struct devfs_node *node = DEVFS_NODE(vp);
765 struct devfs_node *newnode;
766 cdev_t dev, ndev = NULL;
770 if (node->d_dev == NULL)
772 if (!devfs_node_is_accessible(node))
776 if ((dev = vp->v_rdev) == NULL)
779 if (node && ap->a_fp) {
780 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open: -1.1-\n");
781 lockmgr(&devfs_lock, LK_EXCLUSIVE);
783 ndev = devfs_clone(dev, node->d_dir.d_name, node->d_dir.d_namlen,
784 ap->a_mode, ap->a_cred);
786 newnode = devfs_create_device_node(
787 DEVFS_MNTDATA(vp->v_mount)->root_node,
789 /* XXX: possibly destroy device if this happens */
791 if (newnode != NULL) {
795 devfs_debug(DEVFS_DEBUG_DEBUG,
796 "parent here is: %s, node is: |%s|\n",
797 ((node->parent->node_type == Proot) ?
798 "ROOT!" : node->parent->d_dir.d_name),
799 newnode->d_dir.d_name);
800 devfs_debug(DEVFS_DEBUG_DEBUG,
802 ((struct devfs_node *)(TAILQ_LAST(DEVFS_DENODE_HEAD(node->parent), devfs_node_head)))->d_dir.d_name);
805 * orig_vp is set to the original vp if we cloned.
807 /* node->flags |= DEVFS_CLONED; */
808 devfs_allocv(&vp, newnode);
813 lockmgr(&devfs_lock, LK_RELEASE);
816 devfs_debug(DEVFS_DEBUG_DEBUG,
817 "devfs_spec_open() called on %s! \n",
821 * Make this field valid before any I/O in ->d_open
823 if (!dev->si_iosize_max)
824 dev->si_iosize_max = DFLTPHYS;
826 if (dev_dflags(dev) & D_TTY)
827 vp->v_flag |= VISTTY;
830 error = dev_dopen(dev, ap->a_mode, S_IFCHR, ap->a_cred);
831 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
834 * Clean up any cloned vp if we error out.
840 /* orig_vp = NULL; */
846 * This checks if the disk device is going to be opened for writing.
847 * It will be only allowed in the cases where securelevel permits it
848 * and it's not mounted R/W.
850 if ((dev_dflags(dev) & D_DISK) && (ap->a_mode & FWRITE) &&
851 (ap->a_cred != FSCRED)) {
853 /* Very secure mode. No open for writing allowed */
854 if (securelevel >= 2)
858 * If it is mounted R/W, do not allow to open for writing.
859 * In the case it's mounted read-only but securelevel
860 * is >= 1, then do not allow opening for writing either.
862 if (vfs_mountedon(vp)) {
863 if (!(dev->si_mountpoint->mnt_flag & MNT_RDONLY))
865 else if (securelevel >= 1)
870 if (dev_dflags(dev) & D_TTY) {
875 devfs_debug(DEVFS_DEBUG_DEBUG,
876 "devfs: no t_stop\n");
877 tp->t_stop = nottystop;
883 if (vn_isdisk(vp, NULL)) {
884 if (!dev->si_bsize_phys)
885 dev->si_bsize_phys = DEV_BSIZE;
886 vinitvmio(vp, IDX_TO_OFF(INT_MAX));
892 nanotime(&node->atime);
898 /* Ugly pty magic, to make pty devices appear once they are opened */
899 if (node && (node->flags & DEVFS_PTY) == DEVFS_PTY)
900 node->flags &= ~DEVFS_INVISIBLE;
903 ap->a_fp->f_type = DTYPE_VNODE;
904 ap->a_fp->f_flag = ap->a_mode & FMASK;
905 ap->a_fp->f_ops = &devfs_dev_fileops;
906 ap->a_fp->f_data = vp;
914 devfs_spec_close(struct vop_close_args *ap)
916 struct devfs_node *node = DEVFS_NODE(ap->a_vp);
917 struct proc *p = curproc;
918 struct vnode *vp = ap->a_vp;
919 cdev_t dev = vp->v_rdev;
923 devfs_debug(DEVFS_DEBUG_DEBUG,
924 "devfs_spec_close() called on %s! \n",
928 * A couple of hacks for devices and tty devices. The
929 * vnode ref count cannot be used to figure out the
930 * last close, but we can use v_opencount now that
931 * revoke works properly.
933 * Detect the last close on a controlling terminal and clear
934 * the session (half-close).
939 if (p && vp->v_opencount <= 1 && vp == p->p_session->s_ttyvp) {
940 p->p_session->s_ttyvp = NULL;
945 * Vnodes can be opened and closed multiple times. Do not really
946 * close the device unless (1) it is being closed forcibly,
947 * (2) the device wants to track closes, or (3) this is the last
948 * vnode doing its last close on the device.
950 * XXX the VXLOCK (force close) case can leave vnodes referencing
951 * a closed device. This might not occur now that our revoke is
954 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -1- \n");
955 if (dev && ((vp->v_flag & VRECLAIMED) ||
956 (dev_dflags(dev) & D_TRACKCLOSE) ||
957 (vp->v_opencount == 1))) {
959 * Unlock around dev_dclose()
962 if (vn_islocked(vp)) {
966 error = dev_dclose(dev, ap->a_fflag, S_IFCHR);
969 * Ugly pty magic, to make pty devices disappear again once
972 if (node && (node->flags & DEVFS_PTY) == DEVFS_PTY)
973 node->flags |= DEVFS_INVISIBLE;
976 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
980 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -2- \n");
983 * Track the actual opens and closes on the vnode. The last close
984 * disassociates the rdev. If the rdev is already disassociated or
985 * the opencount is already 0, the vnode might have been revoked
986 * and no further opencount tracking occurs.
990 if (vp->v_opencount > 0)
998 devfs_specf_close(struct file *fp)
1000 struct vnode *vp = (struct vnode *)fp->f_data;
1004 fp->f_ops = &badfileops;
1005 error = vn_close(vp, fp->f_flag);
1013 * Device-optimized file table vnode read routine.
1015 * This bypasses the VOP table and talks directly to the device. Most
1016 * filesystems just route to specfs and can make this optimization.
1018 * MPALMOSTSAFE - acquires mplock
1021 devfs_specf_read(struct file *fp, struct uio *uio,
1022 struct ucred *cred, int flags)
1024 struct devfs_node *node;
1031 KASSERT(uio->uio_td == curthread,
1032 ("uio_td %p is not td %p", uio->uio_td, curthread));
1034 vp = (struct vnode *)fp->f_data;
1035 if (vp == NULL || vp->v_type == VBAD) {
1039 node = DEVFS_NODE(vp);
1041 if ((dev = vp->v_rdev) == NULL) {
1048 if (uio->uio_resid == 0) {
1052 if ((flags & O_FOFFSET) == 0)
1053 uio->uio_offset = fp->f_offset;
1056 if (flags & O_FBLOCKING) {
1057 /* ioflag &= ~IO_NDELAY; */
1058 } else if (flags & O_FNONBLOCKING) {
1059 ioflag |= IO_NDELAY;
1060 } else if (fp->f_flag & FNONBLOCK) {
1061 ioflag |= IO_NDELAY;
1063 if (flags & O_FBUFFERED) {
1064 /* ioflag &= ~IO_DIRECT; */
1065 } else if (flags & O_FUNBUFFERED) {
1066 ioflag |= IO_DIRECT;
1067 } else if (fp->f_flag & O_DIRECT) {
1068 ioflag |= IO_DIRECT;
1070 ioflag |= sequential_heuristic(uio, fp);
1072 error = dev_dread(dev, uio, ioflag);
1076 nanotime(&node->atime);
1077 if ((flags & O_FOFFSET) == 0)
1078 fp->f_offset = uio->uio_offset;
1079 fp->f_nextoff = uio->uio_offset;
1087 devfs_specf_write(struct file *fp, struct uio *uio,
1088 struct ucred *cred, int flags)
1090 struct devfs_node *node;
1097 KASSERT(uio->uio_td == curthread,
1098 ("uio_td %p is not p %p", uio->uio_td, curthread));
1100 vp = (struct vnode *)fp->f_data;
1101 if (vp == NULL || vp->v_type == VBAD) {
1105 node = DEVFS_NODE(vp);
1106 if (vp->v_type == VREG)
1107 bwillwrite(uio->uio_resid);
1108 vp = (struct vnode *)fp->f_data;
1110 if ((dev = vp->v_rdev) == NULL) {
1116 if ((flags & O_FOFFSET) == 0)
1117 uio->uio_offset = fp->f_offset;
1120 if (vp->v_type == VREG &&
1121 ((fp->f_flag & O_APPEND) || (flags & O_FAPPEND))) {
1122 ioflag |= IO_APPEND;
1125 if (flags & O_FBLOCKING) {
1126 /* ioflag &= ~IO_NDELAY; */
1127 } else if (flags & O_FNONBLOCKING) {
1128 ioflag |= IO_NDELAY;
1129 } else if (fp->f_flag & FNONBLOCK) {
1130 ioflag |= IO_NDELAY;
1132 if (flags & O_FBUFFERED) {
1133 /* ioflag &= ~IO_DIRECT; */
1134 } else if (flags & O_FUNBUFFERED) {
1135 ioflag |= IO_DIRECT;
1136 } else if (fp->f_flag & O_DIRECT) {
1137 ioflag |= IO_DIRECT;
1139 if (flags & O_FASYNCWRITE) {
1140 /* ioflag &= ~IO_SYNC; */
1141 } else if (flags & O_FSYNCWRITE) {
1143 } else if (fp->f_flag & O_FSYNC) {
1147 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))
1149 ioflag |= sequential_heuristic(uio, fp);
1151 error = dev_dwrite(dev, uio, ioflag);
1155 nanotime(&node->atime);
1156 nanotime(&node->mtime);
1159 if ((flags & O_FOFFSET) == 0)
1160 fp->f_offset = uio->uio_offset;
1161 fp->f_nextoff = uio->uio_offset;
1169 devfs_specf_stat(struct file *fp, struct stat *sb, struct ucred *cred)
1175 vp = (struct vnode *)fp->f_data;
1176 error = vn_stat(vp, sb, cred);
1188 error = VOP_GETATTR(vp, vap);
1195 * Zero the spare stat fields
1201 * Copy from vattr table ... or not in case it's a cloned device
1203 if (vap->va_fsid != VNOVAL)
1204 sb->st_dev = vap->va_fsid;
1206 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
1208 sb->st_ino = vap->va_fileid;
1210 mode = vap->va_mode;
1214 if (vap->va_nlink > (nlink_t)-1)
1215 sb->st_nlink = (nlink_t)-1;
1217 sb->st_nlink = vap->va_nlink;
1218 sb->st_uid = vap->va_uid;
1219 sb->st_gid = vap->va_gid;
1220 sb->st_rdev = dev2udev(DEVFS_NODE(vp)->d_dev);
1221 sb->st_size = vap->va_bytes;
1222 sb->st_atimespec = vap->va_atime;
1223 sb->st_mtimespec = vap->va_mtime;
1224 sb->st_ctimespec = vap->va_ctime;
1227 * A VCHR and VBLK device may track the last access and last modified
1228 * time independantly of the filesystem. This is particularly true
1229 * because device read and write calls may bypass the filesystem.
1231 if (vp->v_type == VCHR || vp->v_type == VBLK) {
1234 if (dev->si_lastread) {
1235 sb->st_atimespec.tv_sec = dev->si_lastread;
1236 sb->st_atimespec.tv_nsec = 0;
1238 if (dev->si_lastwrite) {
1239 sb->st_atimespec.tv_sec = dev->si_lastwrite;
1240 sb->st_atimespec.tv_nsec = 0;
1246 * According to www.opengroup.org, the meaning of st_blksize is
1247 * "a filesystem-specific preferred I/O block size for this
1248 * object. In some filesystem types, this may vary from file
1250 * Default to PAGE_SIZE after much discussion.
1253 sb->st_blksize = PAGE_SIZE;
1255 sb->st_flags = vap->va_flags;
1257 error = priv_check_cred(cred, PRIV_VFS_GENERATION, 0);
1261 sb->st_gen = (u_int32_t)vap->va_gen;
1263 sb->st_blocks = vap->va_bytes / S_BLKSIZE;
1264 sb->st_fsmid = vap->va_fsmid;
1272 devfs_specf_kqfilter(struct file *fp, struct knote *kn)
1280 vp = (struct vnode *)fp->f_data;
1281 if (vp == NULL || vp->v_type == VBAD) {
1285 if ((dev = vp->v_rdev) == NULL) {
1291 error = dev_dkqfilter(dev, kn);
1302 devfs_specf_poll(struct file *fp, int events, struct ucred *cred)
1304 struct devfs_node *node;
1311 vp = (struct vnode *)fp->f_data;
1312 if (vp == NULL || vp->v_type == VBAD) {
1316 node = DEVFS_NODE(vp);
1318 if ((dev = vp->v_rdev) == NULL) {
1323 error = dev_dpoll(dev, events);
1329 nanotime(&node->atime);
1338 * MPALMOSTSAFE - acquires mplock
1341 devfs_specf_ioctl(struct file *fp, u_long com, caddr_t data,
1342 struct ucred *ucred, struct sysmsg *msg)
1344 struct devfs_node *node;
1349 struct fiodname_args *name_args;
1354 vp = ((struct vnode *)fp->f_data);
1355 if ((dev = vp->v_rdev) == NULL) {
1356 error = EBADF; /* device was revoked */
1360 node = DEVFS_NODE(vp);
1362 devfs_debug(DEVFS_DEBUG_DEBUG,
1363 "devfs_specf_ioctl() called! for dev %s\n",
1366 if (com == FIODTYPE) {
1367 *(int *)data = dev_dflags(dev) & D_TYPEMASK;
1370 } else if (com == FIODNAME) {
1371 name_args = (struct fiodname_args *)data;
1372 name = dev->si_name;
1373 namlen = strlen(name) + 1;
1375 devfs_debug(DEVFS_DEBUG_DEBUG,
1376 "ioctl, got: FIODNAME for %s\n", name);
1378 if (namlen <= name_args->len)
1379 error = copyout(dev->si_name, name_args->name, namlen);
1383 devfs_debug(DEVFS_DEBUG_DEBUG,
1384 "ioctl stuff: error: %d\n", error);
1388 error = dev_dioctl(dev, com, data, fp->f_flag, ucred, msg);
1392 nanotime(&node->atime);
1393 nanotime(&node->mtime);
1397 if (com == TIOCSCTTY) {
1398 devfs_debug(DEVFS_DEBUG_DEBUG,
1399 "devfs_specf_ioctl: got TIOCSCTTY on %s\n",
1402 if (error == 0 && com == TIOCSCTTY) {
1403 struct proc *p = curthread->td_proc;
1404 struct session *sess;
1406 devfs_debug(DEVFS_DEBUG_DEBUG,
1407 "devfs_specf_ioctl: dealing with TIOCSCTTY on %s\n",
1413 sess = p->p_session;
1416 * Do nothing if reassigning same control tty
1418 if (sess->s_ttyvp == vp) {
1424 * Get rid of reference to old control tty
1426 ovp = sess->s_ttyvp;
1435 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_ioctl() finished! \n");
1441 devfs_spec_fsync(struct vop_fsync_args *ap)
1443 struct vnode *vp = ap->a_vp;
1446 if (!vn_isdisk(vp, NULL))
1450 * Flush all dirty buffers associated with a block device.
1452 error = vfsync(vp, ap->a_waitfor, 10000, NULL, NULL);
1457 devfs_spec_read(struct vop_read_args *ap)
1459 struct devfs_node *node;
1468 node = DEVFS_NODE(vp);
1470 if (dev == NULL) /* device was revoked */
1472 if (uio->uio_resid == 0)
1476 error = dev_dread(dev, uio, ap->a_ioflag);
1477 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1480 nanotime(&node->atime);
1486 * Vnode op for write
1488 * spec_write(struct vnode *a_vp, struct uio *a_uio, int a_ioflag,
1489 * struct ucred *a_cred)
1492 devfs_spec_write(struct vop_write_args *ap)
1494 struct devfs_node *node;
1503 node = DEVFS_NODE(vp);
1505 KKASSERT(uio->uio_segflg != UIO_NOCOPY);
1507 if (dev == NULL) /* device was revoked */
1511 error = dev_dwrite(dev, uio, ap->a_ioflag);
1512 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1515 nanotime(&node->atime);
1516 nanotime(&node->mtime);
1523 * Device ioctl operation.
1525 * spec_ioctl(struct vnode *a_vp, int a_command, caddr_t a_data,
1526 * int a_fflag, struct ucred *a_cred, struct sysmsg *msg)
1529 devfs_spec_ioctl(struct vop_ioctl_args *ap)
1531 struct vnode *vp = ap->a_vp;
1532 struct devfs_node *node;
1535 if ((dev = vp->v_rdev) == NULL)
1536 return (EBADF); /* device was revoked */
1537 node = DEVFS_NODE(vp);
1541 nanotime(&node->atime);
1542 nanotime(&node->mtime);
1546 return (dev_dioctl(dev, ap->a_command, ap->a_data, ap->a_fflag,
1547 ap->a_cred, ap->a_sysmsg));
1551 * spec_poll(struct vnode *a_vp, int a_events, struct ucred *a_cred)
1555 devfs_spec_poll(struct vop_poll_args *ap)
1557 struct vnode *vp = ap->a_vp;
1558 struct devfs_node *node;
1561 if ((dev = vp->v_rdev) == NULL)
1562 return (EBADF); /* device was revoked */
1563 node = DEVFS_NODE(vp);
1567 nanotime(&node->atime);
1570 return (dev_dpoll(dev, ap->a_events));
1574 * spec_kqfilter(struct vnode *a_vp, struct knote *a_kn)
1578 devfs_spec_kqfilter(struct vop_kqfilter_args *ap)
1580 struct vnode *vp = ap->a_vp;
1581 struct devfs_node *node;
1584 if ((dev = vp->v_rdev) == NULL)
1585 return (EBADF); /* device was revoked */
1586 node = DEVFS_NODE(vp);
1590 nanotime(&node->atime);
1593 return (dev_dkqfilter(dev, ap->a_kn));
1597 * Convert a vnode strategy call into a device strategy call. Vnode strategy
1598 * calls are not limited to device DMA limits so we have to deal with the
1601 * spec_strategy(struct vnode *a_vp, struct bio *a_bio)
1604 devfs_spec_strategy(struct vop_strategy_args *ap)
1606 struct bio *bio = ap->a_bio;
1607 struct buf *bp = bio->bio_buf;
1614 if (bp->b_cmd != BUF_CMD_READ && LIST_FIRST(&bp->b_dep) != NULL)
1618 * Collect statistics on synchronous and asynchronous read
1619 * and write counts for disks that have associated filesystems.
1622 KKASSERT(vp->v_rdev != NULL); /* XXX */
1623 if (vn_isdisk(vp, NULL) && (mp = vp->v_rdev->si_mountpoint) != NULL) {
1624 if (bp->b_cmd == BUF_CMD_READ) {
1625 if (bp->b_flags & BIO_SYNC)
1626 mp->mnt_stat.f_syncreads++;
1628 mp->mnt_stat.f_asyncreads++;
1630 if (bp->b_flags & BIO_SYNC)
1631 mp->mnt_stat.f_syncwrites++;
1633 mp->mnt_stat.f_asyncwrites++;
1638 * Device iosize limitations only apply to read and write. Shortcut
1639 * the I/O if it fits.
1641 if ((maxiosize = vp->v_rdev->si_iosize_max) == 0) {
1642 devfs_debug(DEVFS_DEBUG_DEBUG,
1643 "%s: si_iosize_max not set!\n",
1644 dev_dname(vp->v_rdev));
1645 maxiosize = MAXPHYS;
1647 #if SPEC_CHAIN_DEBUG & 2
1650 if (bp->b_bcount <= maxiosize ||
1651 (bp->b_cmd != BUF_CMD_READ && bp->b_cmd != BUF_CMD_WRITE)) {
1652 dev_dstrategy_chain(vp->v_rdev, bio);
1657 * Clone the buffer and set up an I/O chain to chunk up the I/O.
1659 nbp = kmalloc(sizeof(*bp), M_DEVBUF, M_INTWAIT|M_ZERO);
1663 BUF_LOCK(nbp, LK_EXCLUSIVE);
1666 nbp->b_flags = B_PAGING | (bp->b_flags & B_BNOCLIP);
1667 nbp->b_data = bp->b_data;
1668 nbp->b_bio1.bio_done = devfs_spec_strategy_done;
1669 nbp->b_bio1.bio_offset = bio->bio_offset;
1670 nbp->b_bio1.bio_caller_info1.ptr = bio;
1673 * Start the first transfer
1675 if (vn_isdisk(vp, NULL))
1676 chunksize = vp->v_rdev->si_bsize_phys;
1678 chunksize = DEV_BSIZE;
1679 chunksize = maxiosize / chunksize * chunksize;
1680 #if SPEC_CHAIN_DEBUG & 1
1681 devfs_debug(DEVFS_DEBUG_DEBUG,
1682 "spec_strategy chained I/O chunksize=%d\n",
1685 nbp->b_cmd = bp->b_cmd;
1686 nbp->b_bcount = chunksize;
1687 nbp->b_bufsize = chunksize; /* used to detect a short I/O */
1688 nbp->b_bio1.bio_caller_info2.index = chunksize;
1690 #if SPEC_CHAIN_DEBUG & 1
1691 devfs_debug(DEVFS_DEBUG_DEBUG,
1692 "spec_strategy: chain %p offset %d/%d bcount %d\n",
1693 bp, 0, bp->b_bcount, nbp->b_bcount);
1696 dev_dstrategy(vp->v_rdev, &nbp->b_bio1);
1698 if (DEVFS_NODE(vp)) {
1699 nanotime(&DEVFS_NODE(vp)->atime);
1700 nanotime(&DEVFS_NODE(vp)->mtime);
1707 * Chunked up transfer completion routine - chain transfers until done
1711 devfs_spec_strategy_done(struct bio *nbio)
1713 struct buf *nbp = nbio->bio_buf;
1714 struct bio *bio = nbio->bio_caller_info1.ptr; /* original bio */
1715 struct buf *bp = bio->bio_buf; /* original bp */
1716 int chunksize = nbio->bio_caller_info2.index; /* chunking */
1717 int boffset = nbp->b_data - bp->b_data;
1719 if (nbp->b_flags & B_ERROR) {
1721 * An error terminates the chain, propogate the error back
1722 * to the original bp
1724 bp->b_flags |= B_ERROR;
1725 bp->b_error = nbp->b_error;
1726 bp->b_resid = bp->b_bcount - boffset +
1727 (nbp->b_bcount - nbp->b_resid);
1728 #if SPEC_CHAIN_DEBUG & 1
1729 devfs_debug(DEVFS_DEBUG_DEBUG,
1730 "spec_strategy: chain %p error %d bcount %d/%d\n",
1731 bp, bp->b_error, bp->b_bcount,
1732 bp->b_bcount - bp->b_resid);
1734 kfree(nbp, M_DEVBUF);
1736 } else if (nbp->b_resid) {
1738 * A short read or write terminates the chain
1740 bp->b_error = nbp->b_error;
1741 bp->b_resid = bp->b_bcount - boffset +
1742 (nbp->b_bcount - nbp->b_resid);
1743 #if SPEC_CHAIN_DEBUG & 1
1744 devfs_debug(DEVFS_DEBUG_DEBUG,
1745 "spec_strategy: chain %p short read(1) "
1747 bp, bp->b_bcount - bp->b_resid, bp->b_bcount);
1749 kfree(nbp, M_DEVBUF);
1751 } else if (nbp->b_bcount != nbp->b_bufsize) {
1753 * A short read or write can also occur by truncating b_bcount
1755 #if SPEC_CHAIN_DEBUG & 1
1756 devfs_debug(DEVFS_DEBUG_DEBUG,
1757 "spec_strategy: chain %p short read(2) "
1759 bp, nbp->b_bcount + boffset, bp->b_bcount);
1762 bp->b_bcount = nbp->b_bcount + boffset;
1763 bp->b_resid = nbp->b_resid;
1764 kfree(nbp, M_DEVBUF);
1766 } else if (nbp->b_bcount + boffset == bp->b_bcount) {
1768 * No more data terminates the chain
1770 #if SPEC_CHAIN_DEBUG & 1
1771 devfs_debug(DEVFS_DEBUG_DEBUG,
1772 "spec_strategy: chain %p finished bcount %d\n",
1777 kfree(nbp, M_DEVBUF);
1781 * Continue the chain
1783 boffset += nbp->b_bcount;
1784 nbp->b_data = bp->b_data + boffset;
1785 nbp->b_bcount = bp->b_bcount - boffset;
1786 if (nbp->b_bcount > chunksize)
1787 nbp->b_bcount = chunksize;
1788 nbp->b_bio1.bio_done = devfs_spec_strategy_done;
1789 nbp->b_bio1.bio_offset = bio->bio_offset + boffset;
1791 #if SPEC_CHAIN_DEBUG & 1
1792 devfs_debug(DEVFS_DEBUG_DEBUG,
1793 "spec_strategy: chain %p offset %d/%d bcount %d\n",
1794 bp, boffset, bp->b_bcount, nbp->b_bcount);
1797 dev_dstrategy(nbp->b_vp->v_rdev, &nbp->b_bio1);
1802 * spec_freeblks(struct vnode *a_vp, daddr_t a_addr, daddr_t a_length)
1805 devfs_spec_freeblks(struct vop_freeblks_args *ap)
1810 * XXX: This assumes that strategy does the deed right away.
1811 * XXX: this may not be TRTTD.
1813 KKASSERT(ap->a_vp->v_rdev != NULL);
1814 if ((dev_dflags(ap->a_vp->v_rdev) & D_CANFREE) == 0)
1816 bp = geteblk(ap->a_length);
1817 bp->b_cmd = BUF_CMD_FREEBLKS;
1818 bp->b_bio1.bio_offset = ap->a_offset;
1819 bp->b_bcount = ap->a_length;
1820 dev_dstrategy(ap->a_vp->v_rdev, &bp->b_bio1);
1825 * Implement degenerate case where the block requested is the block
1826 * returned, and assume that the entire device is contiguous in regards
1827 * to the contiguous block range (runp and runb).
1829 * spec_bmap(struct vnode *a_vp, off_t a_loffset,
1830 * off_t *a_doffsetp, int *a_runp, int *a_runb)
1833 devfs_spec_bmap(struct vop_bmap_args *ap)
1835 if (ap->a_doffsetp != NULL)
1836 *ap->a_doffsetp = ap->a_loffset;
1837 if (ap->a_runp != NULL)
1838 *ap->a_runp = MAXBSIZE;
1839 if (ap->a_runb != NULL) {
1840 if (ap->a_loffset < MAXBSIZE)
1841 *ap->a_runb = (int)ap->a_loffset;
1843 *ap->a_runb = MAXBSIZE;
1850 * Special device advisory byte-level locks.
1852 * spec_advlock(struct vnode *a_vp, caddr_t a_id, int a_op,
1853 * struct flock *a_fl, int a_flags)
1857 devfs_spec_advlock(struct vop_advlock_args *ap)
1859 return ((ap->a_flags & F_POSIX) ? EINVAL : EOPNOTSUPP);
1863 devfs_spec_getpages_iodone(struct bio *bio)
1865 bio->bio_buf->b_cmd = BUF_CMD_DONE;
1866 wakeup(bio->bio_buf);
1870 * spec_getpages() - get pages associated with device vnode.
1872 * Note that spec_read and spec_write do not use the buffer cache, so we
1873 * must fully implement getpages here.
1876 devfs_spec_getpages(struct vop_getpages_args *ap)
1880 int i, pcount, size;
1883 vm_ooffset_t offset;
1884 int toff, nextoff, nread;
1885 struct vnode *vp = ap->a_vp;
1890 pcount = round_page(ap->a_count) / PAGE_SIZE;
1893 * Calculate the offset of the transfer and do sanity check.
1895 offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset;
1898 * Round up physical size for real devices. We cannot round using
1899 * v_mount's block size data because v_mount has nothing to do with
1900 * the device. i.e. it's usually '/dev'. We need the physical block
1901 * size for the device itself.
1903 * We can't use v_rdev->si_mountpoint because it only exists when the
1904 * block device is mounted. However, we can use v_rdev.
1906 if (vn_isdisk(vp, NULL))
1907 blksiz = vp->v_rdev->si_bsize_phys;
1911 size = (ap->a_count + blksiz - 1) & ~(blksiz - 1);
1914 kva = (vm_offset_t)bp->b_data;
1917 * Map the pages to be read into the kva.
1919 pmap_qenter(kva, ap->a_m, pcount);
1921 /* Build a minimal buffer header. */
1922 bp->b_cmd = BUF_CMD_READ;
1923 bp->b_bcount = size;
1925 bp->b_runningbufspace = size;
1927 runningbufspace += bp->b_runningbufspace;
1931 bp->b_bio1.bio_offset = offset;
1932 bp->b_bio1.bio_done = devfs_spec_getpages_iodone;
1934 mycpu->gd_cnt.v_vnodein++;
1935 mycpu->gd_cnt.v_vnodepgsin += pcount;
1938 vn_strategy(ap->a_vp, &bp->b_bio1);
1942 /* We definitely need to be at splbio here. */
1943 while (bp->b_cmd != BUF_CMD_DONE)
1944 tsleep(bp, 0, "spread", 0);
1948 if (bp->b_flags & B_ERROR) {
1950 error = bp->b_error;
1956 * If EOF is encountered we must zero-extend the result in order
1957 * to ensure that the page does not contain garabge. When no
1958 * error occurs, an early EOF is indicated if b_bcount got truncated.
1959 * b_resid is relative to b_bcount and should be 0, but some devices
1960 * might indicate an EOF with b_resid instead of truncating b_bcount.
1962 nread = bp->b_bcount - bp->b_resid;
1963 if (nread < ap->a_count)
1964 bzero((caddr_t)kva + nread, ap->a_count - nread);
1965 pmap_qremove(kva, pcount);
1968 for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) {
1969 nextoff = toff + PAGE_SIZE;
1972 m->flags &= ~PG_ZERO;
1975 * NOTE: vm_page_undirty/clear_dirty etc do not clear the
1976 * pmap modified bit. pmap modified bit should have
1977 * already been cleared.
1979 if (nextoff <= nread) {
1980 m->valid = VM_PAGE_BITS_ALL;
1982 } else if (toff < nread) {
1984 * Since this is a VM request, we have to supply the
1985 * unaligned offset to allow vm_page_set_valid()
1986 * to zero sub-DEV_BSIZE'd portions of the page.
1988 vm_page_set_valid(m, 0, nread - toff);
1989 vm_page_clear_dirty_end_nonincl(m, 0, nread - toff);
1995 if (i != ap->a_reqpage) {
1997 * Just in case someone was asking for this page we
1998 * now tell them that it is ok to use.
2000 if (!error || (m->valid == VM_PAGE_BITS_ALL)) {
2002 if (m->flags & PG_WANTED) {
2003 vm_page_activate(m);
2005 vm_page_deactivate(m);
2014 } else if (m->valid) {
2017 * Since this is a VM request, we need to make the
2018 * entire page presentable by zeroing invalid sections.
2020 if (m->valid != VM_PAGE_BITS_ALL)
2021 vm_page_zero_invalid(m, FALSE);
2025 m = ap->a_m[ap->a_reqpage];
2026 devfs_debug(DEVFS_DEBUG_WARNING,
2027 "spec_getpages:(%s) I/O read failure: (error=%d) bp %p vp %p\n",
2028 devtoname(vp->v_rdev), error, bp, bp->b_vp);
2029 devfs_debug(DEVFS_DEBUG_WARNING,
2030 " size: %d, resid: %d, a_count: %d, valid: 0x%x\n",
2031 size, bp->b_resid, ap->a_count, m->valid);
2032 devfs_debug(DEVFS_DEBUG_WARNING,
2033 " nread: %d, reqpage: %d, pindex: %lu, pcount: %d\n",
2034 nread, ap->a_reqpage, (u_long)m->pindex, pcount);
2036 * Free the buffer header back to the swap buffer pool.
2039 return VM_PAGER_ERROR;
2042 * Free the buffer header back to the swap buffer pool.
2045 if (DEVFS_NODE(ap->a_vp))
2046 nanotime(&DEVFS_NODE(ap->a_vp)->mtime);
2052 sequential_heuristic(struct uio *uio, struct file *fp)
2055 * Sequential heuristic - detect sequential operation
2057 if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
2058 uio->uio_offset == fp->f_nextoff) {
2060 * XXX we assume that the filesystem block size is
2061 * the default. Not true, but still gives us a pretty
2062 * good indicator of how sequential the read operations
2065 int tmpseq = fp->f_seqcount;
2067 tmpseq += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE;
2068 if (tmpseq > IO_SEQMAX)
2070 fp->f_seqcount = tmpseq;
2071 return(fp->f_seqcount << IO_SEQSHIFT);
2075 * Not sequential, quick draw-down of seqcount
2077 if (fp->f_seqcount > 1)