sys/vfs/devfs/devfs_vnops.c

   1 /*
   2  * Copyright (c) 2009 The DragonFly Project.  All rights reserved.
   3  *
   4  * This code is derived from software contributed to The DragonFly Project
   5  * by Alex Hornung <ahornung@gmail.com>
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the
  16  *    distribution.
  17  * 3. Neither the name of The DragonFly Project nor the names of its
  18  *    contributors may be used to endorse or promote products derived
  19  *    from this software without specific, prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  */
  34 #include <sys/param.h>
  35 #include <sys/systm.h>
  36 #include <sys/time.h>
  37 #include <sys/kernel.h>
  38 #include <sys/lock.h>
  39 #include <sys/fcntl.h>
  40 #include <sys/proc.h>
  41 #include <sys/priv.h>
  42 #include <sys/signalvar.h>
  43 #include <sys/vnode.h>
  44 #include <sys/uio.h>
  45 #include <sys/mount.h>
  46 #include <sys/file.h>
  47 #include <sys/fcntl.h>
  48 #include <sys/namei.h>
  49 #include <sys/dirent.h>
  50 #include <sys/malloc.h>
  51 #include <sys/stat.h>
  52 #include <sys/reg.h>
  53 #include <sys/buf2.h>
  54 #include <vm/vm_pager.h>
  55 #include <vm/vm_zone.h>
  56 #include <vm/vm_object.h>
  57 #include <sys/filio.h>
  58 #include <sys/ttycom.h>
  59 #include <sys/sysref2.h>
  60 #include <sys/tty.h>
  61 #include <vfs/devfs/devfs.h>
  62 #include <sys/pioctl.h>
  63
  64 #include <machine/limits.h>
  65
  66 MALLOC_DECLARE(M_DEVFS);
  67 #define DEVFS_BADOP     (void *)devfs_badop
  68
  69 static int devfs_badop(struct vop_generic_args *);
  70 static int devfs_access(struct vop_access_args *);
  71 static int devfs_inactive(struct vop_inactive_args *);
  72 static int devfs_reclaim(struct vop_reclaim_args *);
  73 static int devfs_readdir(struct vop_readdir_args *);
  74 static int devfs_getattr(struct vop_getattr_args *);
  75 static int devfs_setattr(struct vop_setattr_args *);
  76 static int devfs_readlink(struct vop_readlink_args *);
  77 static int devfs_print(struct vop_print_args *);
  78
  79 static int devfs_nresolve(struct vop_nresolve_args *);
  80 static int devfs_nlookupdotdot(struct vop_nlookupdotdot_args *);
  81 static int devfs_nsymlink(struct vop_nsymlink_args *);
  82 static int devfs_nremove(struct vop_nremove_args *);
  83
  84 static int devfs_spec_open(struct vop_open_args *);
  85 static int devfs_spec_close(struct vop_close_args *);
  86 static int devfs_spec_fsync(struct vop_fsync_args *);
  87
  88 static int devfs_spec_read(struct vop_read_args *);
  89 static int devfs_spec_write(struct vop_write_args *);
  90 static int devfs_spec_ioctl(struct vop_ioctl_args *);
  91 static int devfs_spec_poll(struct vop_poll_args *);
  92 static int devfs_spec_kqfilter(struct vop_kqfilter_args *);
  93 static int devfs_spec_strategy(struct vop_strategy_args *);
  94 static void devfs_spec_strategy_done(struct bio *);
  95 static int devfs_spec_freeblks(struct vop_freeblks_args *);
  96 static int devfs_spec_bmap(struct vop_bmap_args *);
  97 static int devfs_spec_advlock(struct vop_advlock_args *);
  98 static void devfs_spec_getpages_iodone(struct bio *);
  99 static int devfs_spec_getpages(struct vop_getpages_args *);
 100
 101
 102 static int devfs_specf_close(struct file *);
 103 static int devfs_specf_read(struct file *, struct uio *, struct ucred *, int);
 104 static int devfs_specf_write(struct file *, struct uio *, struct ucred *, int);
 105 static int devfs_specf_stat(struct file *, struct stat *, struct ucred *);
 106 static int devfs_specf_kqfilter(struct file *, struct knote *);
 107 static int devfs_specf_poll(struct file *, int, struct ucred *);
 108 static int devfs_specf_ioctl(struct file *, u_long, caddr_t, struct ucred *);
 109
 110
 111 static __inline int sequential_heuristic(struct uio *, struct file *);
 112 extern struct lock              devfs_lock;
 113
 114 /*
 115  * devfs vnode operations for regular files
 116  */
 117 struct vop_ops devfs_vnode_norm_vops = {
 118         .vop_default =          vop_defaultop,
 119         .vop_access =           devfs_access,
 120         .vop_advlock =          DEVFS_BADOP,
 121         .vop_bmap =                     DEVFS_BADOP,
 122         .vop_close =            vop_stdclose,
 123         .vop_getattr =          devfs_getattr,
 124         .vop_inactive =         devfs_inactive,
 125         .vop_ncreate =          DEVFS_BADOP,
 126         .vop_nresolve =         devfs_nresolve,
 127         .vop_nlookupdotdot =    devfs_nlookupdotdot,
 128         .vop_nlink =            DEVFS_BADOP,
 129         .vop_nmkdir =           DEVFS_BADOP,
 130         .vop_nmknod =           DEVFS_BADOP,
 131         .vop_nremove =          devfs_nremove,
 132         .vop_nrename =          DEVFS_BADOP,
 133         .vop_nrmdir =           DEVFS_BADOP,
 134         .vop_nsymlink =         devfs_nsymlink,
 135         .vop_open =                     vop_stdopen,
 136         .vop_pathconf =         vop_stdpathconf,
 137         .vop_print =            devfs_print,
 138         .vop_read =                     DEVFS_BADOP,
 139         .vop_readdir =          devfs_readdir,
 140         .vop_readlink =         devfs_readlink,
 141         .vop_reclaim =          devfs_reclaim,
 142         .vop_setattr =          devfs_setattr,
 143         .vop_write =            DEVFS_BADOP,
 144         .vop_ioctl =            DEVFS_BADOP
 145 };
 146
 147 /*
 148  * devfs vnode operations for character devices
 149  */
 150 struct vop_ops devfs_vnode_dev_vops = {
 151         .vop_default =          vop_defaultop,
 152         .vop_access =           devfs_access,
 153         .vop_advlock =          devfs_spec_advlock,
 154         .vop_bmap =                     devfs_spec_bmap,
 155         .vop_close =            devfs_spec_close,
 156         .vop_freeblks =         devfs_spec_freeblks,
 157         .vop_fsync =            devfs_spec_fsync,
 158         .vop_getattr =          devfs_getattr,
 159         .vop_getpages =         devfs_spec_getpages,
 160         .vop_inactive =         devfs_inactive,
 161         .vop_open =                     devfs_spec_open,
 162         .vop_pathconf =         vop_stdpathconf,
 163         .vop_print =            devfs_print,
 164         .vop_poll =                     devfs_spec_poll,
 165         .vop_kqfilter =         devfs_spec_kqfilter,
 166         .vop_read =                     devfs_spec_read,
 167         .vop_readdir =          DEVFS_BADOP,
 168         .vop_readlink =         DEVFS_BADOP,
 169         .vop_reclaim =          devfs_reclaim,
 170         .vop_setattr =          devfs_setattr,
 171         .vop_strategy =         devfs_spec_strategy,
 172         .vop_write =            devfs_spec_write,
 173         .vop_ioctl =            devfs_spec_ioctl
 174 };
 175
 176 struct vop_ops *devfs_vnode_dev_vops_p = &devfs_vnode_dev_vops;
 177
 178 struct fileops devfs_dev_fileops = {
 179         .fo_read = devfs_specf_read,
 180         .fo_write = devfs_specf_write,
 181         .fo_ioctl = devfs_specf_ioctl,
 182         .fo_poll = devfs_specf_poll,
 183         .fo_kqfilter = devfs_specf_kqfilter,
 184         .fo_stat = devfs_specf_stat,
 185         .fo_close = devfs_specf_close,
 186         .fo_shutdown = nofo_shutdown
 187 };
 188
 189
 190 /*
 191  * generic entry point for unsupported operations
 192  */
 193 static int
 194 devfs_badop(struct vop_generic_args *ap)
 195 {
 196         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs: specified vnode operation is not implemented (yet)\n");
 197         return (EIO);
 198 }
 199
 200
 201 static int
 202 devfs_access(struct vop_access_args *ap)
 203 {
 204         struct devfs_node *node = DEVFS_NODE(ap->a_vp);
 205         int error = 0;
 206
 207         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_access() called!\n");
 208
 209         error = vop_helper_access(ap, node->uid, node->gid,
 210                                 node->mode, node->flags);
 211
 212         //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_access ruled over %s: %d\n", "UNKNOWN", error);
 213
 214         return error;
 215         //XXX: consider possible special cases? terminal, ...?
 216 }
 217
 218
 219 static int
 220 devfs_inactive(struct vop_inactive_args *ap)
 221 {
 222         //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_inactive() called!\n");
 223
 224         /* Check if the devfs_node is not linked anymore into the topology.
 225          * If this is the case, we suggest that the vnode is recycled. */
 226         if (DEVFS_NODE(ap->a_vp)) {
 227                 if ((DEVFS_NODE(ap->a_vp)->flags & DEVFS_NODE_LINKED) == 0) {
 228                         vrecycle(ap->a_vp);
 229                 }
 230         }
 231
 232         return 0;
 233 }
 234
 235
 236 static int
 237 devfs_reclaim(struct vop_reclaim_args *ap)
 238 {
 239         int locked = 0;
 240         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_reclaim() called!\n");
 241
 242         /* Check if it is locked already. if not, we acquire the devfs lock */
 243         if (!(lockstatus(&devfs_lock, curthread)) == LK_EXCLUSIVE) {
 244                 lockmgr(&devfs_lock, LK_EXCLUSIVE);
 245                 locked = 1;
 246         }
 247
 248         /* Check if the devfs_node is not linked anymore into the topology.
 249          * If this is the case, we get rid of the devfs_node. */
 250         if (DEVFS_NODE(ap->a_vp)) {
 251                 if ((DEVFS_NODE(ap->a_vp)->flags & DEVFS_NODE_LINKED) == 0) {
 252                                 devfs_freep(DEVFS_NODE(ap->a_vp));
 253                                 //devfs_tracer_del_orphan(DEVFS_NODE(ap->a_vp));
 254                 }
 255
 256                 /* unlink vnode <--> devfs_node */
 257                 DEVFS_NODE(ap->a_vp)->v_node = NULL;
 258         }
 259
 260         /* If we acquired the lock, we also get rid of it */
 261         if (locked)
 262                 lockmgr(&devfs_lock, LK_RELEASE);
 263
 264         ap->a_vp->v_data = NULL;
 265         /* avoid a panic on release because of not adding it with v_associate_rdev */
 266         ap->a_vp->v_rdev = NULL;
 267
 268         return 0;
 269 }
 270
 271
 272 static int
 273 devfs_readdir(struct vop_readdir_args *ap)
 274 {
 275         struct devfs_node *node;
 276         int error2 = 0, r, error = 0;
 277
 278         int cookie_index;
 279         int ncookies;
 280         off_t *cookies;
 281         off_t saveoff;
 282
 283         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_readdir() called!\n");
 284
 285         if (ap->a_uio->uio_offset < 0 || ap->a_uio->uio_offset > INT_MAX)
 286                 return (EINVAL);
 287         if ((error = vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY)) != 0)
 288                 return (error);
 289
 290         saveoff = ap->a_uio->uio_offset;
 291
 292         if (ap->a_ncookies) {
 293                 ncookies = ap->a_uio->uio_resid / 16 + 1; /* Why / 16 ?? */
 294                 if (ncookies > 256)
 295                         ncookies = 256;
 296                 cookies = kmalloc(256 * sizeof(off_t), M_TEMP, M_WAITOK);
 297                 cookie_index = 0;
 298         } else {
 299                 ncookies = -1;
 300                 cookies = NULL;
 301                 cookie_index = 0;
 302         }
 303
 304         nanotime(&DEVFS_NODE(ap->a_vp)->atime);
 305
 306         if (saveoff == 0) {
 307                 r = vop_write_dirent(&error, ap->a_uio, DEVFS_NODE(ap->a_vp)->d_dir.d_ino, DT_DIR, 1, ".");
 308                 if (r)
 309                         goto done;
 310                 if (cookies)
 311                         cookies[cookie_index] = saveoff;
 312                 saveoff++;
 313                 cookie_index++;
 314                 if (cookie_index == ncookies)
 315                         goto done;
 316         }
 317
 318         if (saveoff == 1) {
 319                 if (DEVFS_NODE(ap->a_vp)->parent) {
 320                         r = vop_write_dirent(&error, ap->a_uio,
 321                                              DEVFS_NODE(ap->a_vp)->d_dir.d_ino,
 322                                              DT_DIR, 2, "..");
 323                 } else {
 324                         r = vop_write_dirent(&error, ap->a_uio,
 325                                              DEVFS_NODE(ap->a_vp)->d_dir.d_ino, DT_DIR, 2, "..");
 326                 }
 327                 if (r)
 328                         goto done;
 329                 if (cookies)
 330                         cookies[cookie_index] = saveoff;
 331                 saveoff++;
 332                 cookie_index++;
 333                 if (cookie_index == ncookies)
 334                         goto done;
 335         }
 336
 337         TAILQ_FOREACH(node, DEVFS_DENODE_HEAD(DEVFS_NODE(ap->a_vp)), link) {
 338                 if ((node->flags & DEVFS_HIDDEN) || (node->flags & DEVFS_INVISIBLE))
 339                         continue;
 340
 341                 if (node->cookie < saveoff)
 342                         continue;
 343 /*
 344                 if (skip > 0) {
 345                         skip--;
 346                         continue;
 347                 }
 348 */
 349                 saveoff = node->cookie;
 350
 351                 error2 = vop_write_dirent(&error, ap->a_uio,
 352                         node->d_dir.d_ino, node->d_dir.d_type,
 353                         node->d_dir.d_namlen, node->d_dir.d_name);
 354
 355                 if(error2)
 356                         break;
 357
 358                 saveoff++;
 359
 360                 if (cookies)
 361                         cookies[cookie_index] = node->cookie;
 362                 ++cookie_index;
 363                 if (cookie_index == ncookies)
 364                         break;
 365
 366                 //count++;
 367         }
 368
 369 done:
 370         vn_unlock(ap->a_vp);
 371
 372         ap->a_uio->uio_offset = saveoff;
 373         if (error && cookie_index == 0) {
 374                 if (cookies) {
 375                         kfree(cookies, M_TEMP);
 376                         *ap->a_ncookies = 0;
 377                         *ap->a_cookies = NULL;
 378                 }
 379         } else {
 380                 if (cookies) {
 381                         *ap->a_ncookies = cookie_index;
 382                         *ap->a_cookies = cookies;
 383                 }
 384         }
 385         return (error);
 386 }
 387
 388
 389 static int
 390 devfs_nresolve(struct vop_nresolve_args *ap)
 391 {
 392         struct devfs_node *node, *found = NULL;
 393         struct namecache *ncp;
 394         struct vnode *vp = NULL;
 395         //void *ident;
 396         int error = 0;
 397         int len;
 398         int hidden = 0;
 399
 400         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve() called!\n");
 401
 402         ncp = ap->a_nch->ncp;
 403         len = ncp->nc_nlen;
 404
 405         lockmgr(&devfs_lock, LK_EXCLUSIVE);
 406
 407         if ((DEVFS_NODE(ap->a_dvp)->node_type != Proot) &&
 408                 (DEVFS_NODE(ap->a_dvp)->node_type != Pdir)) {
 409                 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve: ap->a_dvp is not a dir!!!\n");
 410                 cache_setvp(ap->a_nch, NULL);
 411                 goto out;
 412         }
 413
 414 search:
 415         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -search- \n");
 416         TAILQ_FOREACH(node, DEVFS_DENODE_HEAD(DEVFS_NODE(ap->a_dvp)), link) {
 417                 if (len == node->d_dir.d_namlen) {
 418                         if (!memcmp(ncp->nc_name, node->d_dir.d_name, len)) {
 419                                 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve: found: %s\n", ncp->nc_name);
 420                                 found = node;
 421                                 break;
 422                         }
 423                 }
 424         }
 425
 426         if (found) {
 427                 if ((found->node_type == Plink) && (found->link_target))
 428                         found = found->link_target;
 429
 430                 if (!(found->flags & DEVFS_HIDDEN))
 431                         devfs_allocv(/*ap->a_dvp->v_mount, */ &vp, found);
 432                 else
 433                         hidden = 1;
 434                 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -2- \n");
 435         }
 436
 437         //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -3- %c%c%c\n", ncp->nc_name[0], ncp->nc_name[1], ncp->nc_name[2]);
 438         if (vp == NULL) {
 439                 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve vp==NULL \n");
 440                 /* XXX: len is int, devfs_clone expects size_t*, not int* */
 441                 if ((!hidden) && (!devfs_clone(ncp->nc_name, &len, NULL, 0, ap->a_cred))) {
 442                         goto search;
 443                 }
 444                 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -4- \n");
 445                 error = ENOENT;
 446                 cache_setvp(ap->a_nch, NULL);
 447                 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -5- \n");
 448                 goto out;
 449
 450         }
 451         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -6- \n");
 452         KKASSERT(vp);
 453         vn_unlock(vp);
 454         cache_setvp(ap->a_nch, vp);
 455         vrele(vp);
 456
 457         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -9- \n");
 458 out:
 459         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -end:10- failed? %s \n", (error)?"FAILED!":"OK!");
 460         lockmgr(&devfs_lock, LK_RELEASE);
 461         return error;
 462 }
 463
 464
 465 static int
 466 devfs_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
 467 {
 468         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nlookupdotdot() called!\n");
 469         *ap->a_vpp = NULL;
 470
 471         lockmgr(&devfs_lock, LK_EXCLUSIVE);
 472         if (DEVFS_NODE(ap->a_dvp)->parent != NULL) {
 473                 devfs_allocv(/*ap->a_dvp->v_mount, */ap->a_vpp, DEVFS_NODE(ap->a_dvp)->parent);
 474                 vn_unlock(*ap->a_vpp);
 475         }
 476         lockmgr(&devfs_lock, LK_RELEASE);
 477
 478         return ((*ap->a_vpp == NULL) ? ENOENT : 0);
 479 }
 480
 481
 482 static int
 483 devfs_getattr(struct vop_getattr_args *ap)
 484 {
 485         struct vattr *vap = ap->a_vap;
 486         struct devfs_node *node = DEVFS_NODE(ap->a_vp);
 487         int error = 0;
 488
 489         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_getattr() called for %s!\n", DEVFS_NODE(ap->a_vp)->d_dir.d_name);
 490
 491         /* start by zeroing out the attributes */
 492         VATTR_NULL(vap);
 493
 494         /* next do all the common fields */
 495         vap->va_type = ap->a_vp->v_type;
 496         vap->va_mode = node->mode;
 497         vap->va_fileid = DEVFS_NODE(ap->a_vp)->d_dir.d_ino ;
 498         vap->va_flags = 0; //what should this be?
 499         vap->va_blocksize = DEV_BSIZE;
 500         vap->va_bytes = vap->va_size = sizeof(struct devfs_node);
 501
 502         //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_getattr() check dev %s!\n", (DEVFS_NODE(ap->a_vp)->d_dev)?(DEVFS_NODE(ap->a_vp)->d_dev->si_name):"Not a device");
 503
 504         vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
 505
 506
 507         vap->va_atime = node->atime;
 508         vap->va_mtime = node->mtime;
 509         vap->va_ctime = node->ctime;
 510
 511         vap->va_nlink = 1; /* number of references to file */
 512
 513         vap->va_uid = node->uid;
 514         vap->va_gid = node->gid;
 515
 516         vap->va_rmajor = 0;
 517         vap->va_rminor = 0;
 518
 519         if ((DEVFS_NODE(ap->a_vp)->node_type == Pdev) &&
 520                 (DEVFS_NODE(ap->a_vp)->d_dev))  {
 521                 devfs_debug(DEVFS_DEBUG_DEBUG, "getattr: dev is: %p\n", DEVFS_NODE(ap->a_vp)->d_dev);
 522                 reference_dev(DEVFS_NODE(ap->a_vp)->d_dev);
 523                 vap->va_fsid = dev2udev(DEVFS_NODE(ap->a_vp)->d_dev);
 524                 vap->va_rminor = DEVFS_NODE(ap->a_vp)->d_dev->si_uminor;
 525                 release_dev(DEVFS_NODE(ap->a_vp)->d_dev);
 526         }
 527
 528         /* For a softlink the va_size is the length of the softlink */
 529         if (DEVFS_NODE(ap->a_vp)->symlink_name != 0) {
 530                 vap->va_size = DEVFS_NODE(ap->a_vp)->symlink_namelen;
 531         }
 532         nanotime(&node->atime);
 533         return (error); //XXX: set error usefully
 534 }
 535
 536
 537 static int
 538 devfs_setattr(struct vop_setattr_args *ap)
 539 {
 540         struct devfs_node *node;
 541         struct vattr *vap;
 542         int error = 0;
 543
 544
 545         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_setattr() called!\n");
 546         lockmgr(&devfs_lock, LK_EXCLUSIVE);
 547
 548         vap = ap->a_vap;
 549         node = DEVFS_NODE(ap->a_vp);
 550
 551         if (vap->va_uid != (uid_t)VNOVAL) {
 552                 if ((ap->a_cred->cr_uid != node->uid) &&
 553                         (!groupmember(node->gid, ap->a_cred))) {
 554                         error = priv_check(curthread, PRIV_VFS_CHOWN);
 555                         if (error) {
 556                                 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_setattr, erroring out -1-\n");
 557                                 goto out;
 558                         }
 559                 }
 560                 node->uid = vap->va_uid;
 561         }
 562
 563         if (vap->va_gid != (uid_t)VNOVAL) {
 564                 if ((ap->a_cred->cr_uid != node->uid) &&
 565                         (!groupmember(node->gid, ap->a_cred))) {
 566                         error = priv_check(curthread, PRIV_VFS_CHOWN);
 567                         if (error) {
 568                                 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_setattr, erroring out -2-\n");
 569                                 goto out;
 570                         }
 571                 }
 572                 node->gid = vap->va_gid;
 573         }
 574
 575         if (vap->va_mode != (mode_t)VNOVAL) {
 576                 if (ap->a_cred->cr_uid != node->uid) {
 577                         error = priv_check(curthread, PRIV_VFS_ADMIN);
 578                         if (error) {
 579                                 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_setattr, erroring out -3-\n");
 580                                 goto out;
 581                         }
 582                 }
 583                 node->mode = vap->va_mode;
 584         }
 585
 586 out:
 587         nanotime(&node->mtime);
 588         lockmgr(&devfs_lock, LK_RELEASE);
 589         return error;
 590 }
 591
 592
 593 static int
 594 devfs_readlink(struct vop_readlink_args *ap)
 595 {
 596         struct devfs_node *node = DEVFS_NODE(ap->a_vp);
 597
 598         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_readlink()  called!\n");
 599
 600         return (uiomove(node->symlink_name, node->symlink_namelen, ap->a_uio));
 601 }
 602
 603
 604 static int
 605 devfs_print(struct vop_print_args *ap)
 606 {
 607         //struct devfs_node *node = DEVFS_NODE(ap->a_vp);
 608
 609         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_print() called!\n");
 610
 611         //XXX: print some useful debugging about node.
 612         return (0);
 613 }
 614
 615
 616 static int
 617 devfs_nsymlink(struct vop_nsymlink_args *ap)
 618 {
 619         size_t targetlen = strlen(ap->a_target);
 620
 621         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nsymlink() called!\n");
 622
 623         ap->a_vap->va_type = VLNK;
 624
 625         if ((DEVFS_NODE(ap->a_dvp)->node_type != Proot) &&
 626                 (DEVFS_NODE(ap->a_dvp)->node_type != Pdir)) {
 627                 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nsymlink: ap->a_dvp is not a dir!!!\n");
 628                 goto out;
 629         }
 630         lockmgr(&devfs_lock, LK_EXCLUSIVE);
 631         devfs_allocvp(ap->a_dvp->v_mount, ap->a_vpp, Plink,
 632                                 ap->a_nch->ncp->nc_name, DEVFS_NODE(ap->a_dvp), NULL);
 633
 634         if (*ap->a_vpp) {
 635                 DEVFS_NODE(*ap->a_vpp)->flags |= DEVFS_USER_CREATED;
 636
 637                 DEVFS_NODE(*ap->a_vpp)->symlink_namelen = targetlen;
 638                 DEVFS_NODE(*ap->a_vpp)->symlink_name = kmalloc(targetlen + 1, M_DEVFS, M_WAITOK);
 639                 memcpy(DEVFS_NODE(*ap->a_vpp)->symlink_name, ap->a_target, targetlen);
 640                 DEVFS_NODE(*ap->a_vpp)->symlink_name[targetlen] = '\0';
 641                 cache_setunresolved(ap->a_nch);
 642                 //problematic to use cache_* inside lockmgr() ? Probably not...
 643                 cache_setvp(ap->a_nch, *ap->a_vpp);
 644         }
 645         lockmgr(&devfs_lock, LK_RELEASE);
 646 out:
 647         return ((*ap->a_vpp == NULL) ? ENOTDIR : 0);
 648
 649 }
 650
 651
 652 static int
 653 devfs_nremove(struct vop_nremove_args *ap)
 654 {
 655         struct devfs_node *node;
 656         struct namecache *ncp;
 657         //struct vnode *vp = NULL;
 658         int error = ENOENT;
 659
 660         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nremove() called!\n");
 661
 662         ncp = ap->a_nch->ncp;
 663
 664         lockmgr(&devfs_lock, LK_EXCLUSIVE);
 665
 666         if ((DEVFS_NODE(ap->a_dvp)->node_type != Proot) &&
 667                 (DEVFS_NODE(ap->a_dvp)->node_type != Pdir)) {
 668                 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nremove: ap->a_dvp is not a dir!!!\n");
 669                 goto out;
 670         }
 671
 672         TAILQ_FOREACH(node, DEVFS_DENODE_HEAD(DEVFS_NODE(ap->a_dvp)), link)     {
 673                 if (ncp->nc_nlen == node->d_dir.d_namlen) {
 674                         if (!memcmp(ncp->nc_name, node->d_dir.d_name, ncp->nc_nlen)) {
 675                                 // allow only removal of user created stuff (e.g. symlinks)
 676                                 if ((node->flags & DEVFS_USER_CREATED) == 0) {
 677                                         error = EPERM;
 678                                         goto out;
 679                                 } else {
 680                                         if (node->v_node)
 681                                                 cache_inval_vp(node->v_node, CINV_DESTROY);
 682
 683                                         devfs_unlinkp(node);
 684                                         error = 0;
 685                                         break;
 686                                 }
 687                         }
 688                 }
 689         }
 690
 691         cache_setunresolved(ap->a_nch);
 692         cache_setvp(ap->a_nch, NULL);
 693         //cache_inval_vp(node->v_node, CINV_DESTROY);
 694
 695 out:
 696         lockmgr(&devfs_lock, LK_RELEASE);
 697         //vrele(ap->a_dvp);
 698         //vput(ap->a_dvp);
 699         return error;
 700 }
 701
 702
 703 static int
 704 devfs_spec_open(struct vop_open_args *ap)
 705 {
 706         struct vnode *vp = ap->a_vp;
 707         cdev_t dev, ndev = NULL;
 708         struct devfs_node *node = NULL;
 709         int error = 0;
 710         size_t len;
 711
 712         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open() called\n");
 713
 714         if (DEVFS_NODE(vp)) {
 715                 if (DEVFS_NODE(vp)->d_dev == NULL)
 716                         return ENXIO;
 717         }
 718
 719         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open: -1-\n");
 720
 721         if ((dev = vp->v_rdev) == NULL)
 722                 return ENXIO;
 723
 724         if (DEVFS_NODE(vp) && ap->a_fp) {
 725                 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open: -1.1-\n");
 726                 lockmgr(&devfs_lock, LK_EXCLUSIVE);
 727                 len = DEVFS_NODE(vp)->d_dir.d_namlen;
 728                 if (!(devfs_clone(DEVFS_NODE(vp)->d_dir.d_name, &len, &ndev, 1, ap->a_cred))) {
 729                         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open: -1.2- |%s|\n", ndev->si_name);
 730
 731                         dev = ndev;
 732                         reference_dev(dev);
 733                         devfs_link_dev(dev);
 734                         node = devfs_create_device_node(DEVFS_MNTDATA(vp->v_mount)->root_node, dev, NULL, NULL);
 735                         //node = devfs_allocp(Pdev, ndev->si_name, DEVFS_NODE(vp)->parent, vp->v_mount, dev);
 736
 737                         devfs_debug(DEVFS_DEBUG_DEBUG, "parent here is: %s, node is: |%s|\n", (DEVFS_NODE(vp)->parent->node_type == Proot)?"ROOT!":DEVFS_NODE(vp)->parent->d_dir.d_name, node->d_dir.d_name);
 738                         devfs_debug(DEVFS_DEBUG_DEBUG, "test: %s\n", ((struct devfs_node *)(TAILQ_LAST(DEVFS_DENODE_HEAD(DEVFS_NODE(vp)->parent), devfs_node_head)))->d_dir.d_name);
 739
 740                         node->flags |= DEVFS_CLONED;
 741                         devfs_allocv(&vp, node);
 742
 743                         ap->a_vp = vp;
 744
 745                         //XXX: propagate to other devfs mounts?
 746                 }
 747                 lockmgr(&devfs_lock, LK_RELEASE);
 748         }
 749
 750         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open() called on %s! \n", dev->si_name);
 751         /*
 752          * Make this field valid before any I/O in ->d_open
 753          */
 754         if (!dev->si_iosize_max)
 755                 dev->si_iosize_max = DFLTPHYS;
 756
 757         if (dev_dflags(dev) & D_TTY)
 758                 vp->v_flag |= VISTTY;
 759
 760         vn_unlock(vp);
 761         error = dev_dopen(dev, ap->a_mode, S_IFCHR, ap->a_cred);
 762         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 763
 764         if (error) {
 765                 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open() error out: %x\n", error);
 766                 if (DEVFS_NODE(vp) && ((DEVFS_NODE(vp)->flags & DEVFS_CLONED) == DEVFS_CLONED))
 767                         vput(vp);
 768                 return error;
 769         }
 770
 771
 772         if (dev_dflags(dev) & D_TTY) {
 773                 if (dev->si_tty) {
 774                         struct tty *tp;
 775                         tp = dev->si_tty;
 776                         if (!tp->t_stop) {
 777                                 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs: no t_stop\n");
 778                                 tp->t_stop = nottystop;
 779                         }
 780                 }
 781         }
 782
 783
 784         if (vn_isdisk(vp, NULL)) {
 785                 if (!dev->si_bsize_phys)
 786                         dev->si_bsize_phys = DEV_BSIZE;
 787                 vinitvmio(vp, IDX_TO_OFF(INT_MAX));
 788         }
 789
 790         vop_stdopen(ap);
 791         if (DEVFS_NODE(vp))
 792                 nanotime(&DEVFS_NODE(vp)->atime);
 793
 794         if (DEVFS_NODE(vp) && ((DEVFS_NODE(vp)->flags & DEVFS_CLONED) == DEVFS_CLONED))
 795                 vn_unlock(vp);
 796
 797         /* Ugly pty magic, to make pty devices appear once they are opened */
 798         if (DEVFS_NODE(vp) && ((DEVFS_NODE(vp)->flags & DEVFS_PTY) == DEVFS_PTY))
 799                 DEVFS_NODE(vp)->flags &= ~DEVFS_INVISIBLE;
 800
 801         if (ap->a_fp) {
 802                 ap->a_fp->f_type = DTYPE_VNODE;
 803                 ap->a_fp->f_flag = ap->a_mode & FMASK;
 804                 ap->a_fp->f_ops = &devfs_dev_fileops;
 805                 ap->a_fp->f_data = vp;
 806         }
 807
 808         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open: -end:3-\n");
 809
 810         return 0;
 811 }
 812
 813
 814 static int
 815 devfs_spec_close(struct vop_close_args *ap)
 816 {
 817         struct proc *p = curproc;
 818         struct vnode *vp = ap->a_vp;
 819         cdev_t dev = vp->v_rdev;
 820         int error = 0;
 821         int needrelock;
 822
 823         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() called on %s! \n", dev->si_name);
 824
 825         /*
 826          * A couple of hacks for devices and tty devices.  The
 827          * vnode ref count cannot be used to figure out the
 828          * last close, but we can use v_opencount now that
 829          * revoke works properly.
 830          *
 831          * Detect the last close on a controlling terminal and clear
 832          * the session (half-close).
 833          */
 834         if (dev)
 835                 reference_dev(dev);
 836
 837         if (p && vp->v_opencount <= 1 && vp == p->p_session->s_ttyvp) {
 838                 p->p_session->s_ttyvp = NULL;
 839                 vrele(vp);
 840         }
 841
 842         /*
 843          * Vnodes can be opened and closed multiple times.  Do not really
 844          * close the device unless (1) it is being closed forcibly,
 845          * (2) the device wants to track closes, or (3) this is the last
 846          * vnode doing its last close on the device.
 847          *
 848          * XXX the VXLOCK (force close) case can leave vnodes referencing
 849          * a closed device.  This might not occur now that our revoke is
 850          * fixed.
 851          */
 852         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -1- \n");
 853         if (dev && ((vp->v_flag & VRECLAIMED) ||
 854             (dev_dflags(dev) & D_TRACKCLOSE) ||
 855             (vp->v_opencount == 1))) {
 856                 needrelock = 0;
 857                 if (vn_islocked(vp)) {
 858                         needrelock = 1;
 859                         vn_unlock(vp);
 860                 }
 861                 error = dev_dclose(dev, ap->a_fflag, S_IFCHR);
 862                 if (DEVFS_NODE(vp) && (DEVFS_NODE(vp)->flags & DEVFS_CLONED) == DEVFS_CLONED) {
 863                         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close: last of the cloned ones, so delete node %s\n", dev->si_name);
 864                         devfs_unlinkp(DEVFS_NODE(vp));
 865                         devfs_freep(DEVFS_NODE(vp));
 866                         devfs_unlink_dev(dev);
 867                         release_dev(dev);
 868                         devfs_destroy_cdev(dev);
 869                 }
 870                 /* Ugly pty magic, to make pty devices disappear again once they are closed */
 871                 if (DEVFS_NODE(vp) && ((DEVFS_NODE(vp)->flags & DEVFS_PTY) == DEVFS_PTY))
 872                         DEVFS_NODE(vp)->flags |= DEVFS_INVISIBLE;
 873
 874                 if (needrelock)
 875                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 876         } else {
 877                 error = 0;
 878         }
 879         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -2- \n");
 880         /*
 881          * Track the actual opens and closes on the vnode.  The last close
 882          * disassociates the rdev.  If the rdev is already disassociated or the
 883          * opencount is already 0, the vnode might have been revoked and no
 884          * further opencount tracking occurs.
 885          */
 886         if (dev) {
 887                 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -3- \n");
 888                 if (vp->v_opencount == 1) {
 889                         //vp->v_rdev = 0;
 890                         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -3.5- \n");
 891                 }
 892                 release_dev(dev);
 893         }
 894         if (vp->v_opencount > 0) {
 895                 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -4- \n");
 896                 vop_stdclose(ap);
 897                 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -5- \n");
 898         }
 899
 900         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -end:6- \n");
 901         return(error);
 902
 903 }
 904
 905
 906 static int
 907 devfs_specf_close(struct file *fp)
 908 {
 909         int error;
 910         struct vnode *vp = (struct vnode *)fp->f_data;
 911
 912         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_close() called! \n");
 913         get_mplock();
 914         fp->f_ops = &badfileops;
 915
 916         error = vn_close(vp, fp->f_flag);
 917         rel_mplock();
 918
 919         return (error);
 920 }
 921
 922
 923 /*
 924  * Device-optimized file table vnode read routine.
 925  *
 926  * This bypasses the VOP table and talks directly to the device.  Most
 927  * filesystems just route to specfs and can make this optimization.
 928  *
 929  * MPALMOSTSAFE - acquires mplock
 930  */
 931 static int
 932 devfs_specf_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
 933 {
 934         struct vnode *vp;
 935         int ioflag;
 936         int error;
 937         cdev_t dev;
 938
 939         get_mplock();
 940         //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_read() called! \n");
 941         KASSERT(uio->uio_td == curthread,
 942                 ("uio_td %p is not td %p", uio->uio_td, curthread));
 943
 944         vp = (struct vnode *)fp->f_data;
 945         if (vp == NULL || vp->v_type == VBAD) {
 946                 error = EBADF;
 947                 goto done;
 948         }
 949
 950         if ((dev = vp->v_rdev) == NULL) {
 951                 error = EBADF;
 952                 goto done;
 953         }
 954         //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_read() called! for dev %s\n", dev->si_name);
 955
 956         reference_dev(dev);
 957
 958         if (uio->uio_resid == 0) {
 959                 error = 0;
 960                 goto done;
 961         }
 962         if ((flags & O_FOFFSET) == 0)
 963                 uio->uio_offset = fp->f_offset;
 964
 965         ioflag = 0;
 966         if (flags & O_FBLOCKING) {
 967                 /* ioflag &= ~IO_NDELAY; */
 968         } else if (flags & O_FNONBLOCKING) {
 969                 ioflag |= IO_NDELAY;
 970         } else if (fp->f_flag & FNONBLOCK) {
 971                 ioflag |= IO_NDELAY;
 972         }
 973         if (flags & O_FBUFFERED) {
 974                 /* ioflag &= ~IO_DIRECT; */
 975         } else if (flags & O_FUNBUFFERED) {
 976                 ioflag |= IO_DIRECT;
 977         } else if (fp->f_flag & O_DIRECT) {
 978                 ioflag |= IO_DIRECT;
 979         }
 980         ioflag |= sequential_heuristic(uio, fp);
 981
 982         error = dev_dread(dev, uio, ioflag);
 983
 984         release_dev(dev);
 985         if (DEVFS_NODE(vp))
 986                 nanotime(&DEVFS_NODE(vp)->atime);
 987         if ((flags & O_FOFFSET) == 0)
 988                 fp->f_offset = uio->uio_offset;
 989         fp->f_nextoff = uio->uio_offset;
 990 done:
 991         rel_mplock();
 992         //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_read finished\n");
 993         return (error);
 994 }
 995
 996
 997 static int
 998 devfs_specf_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
 999 {
1000         struct vnode *vp;
1001         int ioflag;
1002         int error;
1003         cdev_t dev;
1004
1005         //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_write() called! \n");
1006         get_mplock();
1007         KASSERT(uio->uio_td == curthread,
1008                 ("uio_td %p is not p %p", uio->uio_td, curthread));
1009
1010         vp = (struct vnode *)fp->f_data;
1011         if (vp == NULL || vp->v_type == VBAD) {
1012                 error = EBADF;
1013                 goto done;
1014         }
1015         if (vp->v_type == VREG)
1016                 bwillwrite(uio->uio_resid);
1017         vp = (struct vnode *)fp->f_data;
1018
1019         if ((dev = vp->v_rdev) == NULL) {
1020                 error = EBADF;
1021                 goto done;
1022         }
1023         //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_write() called! for dev %s\n", dev->si_name);
1024         reference_dev(dev);
1025
1026         if ((flags & O_FOFFSET) == 0)
1027                 uio->uio_offset = fp->f_offset;
1028
1029         ioflag = IO_UNIT;
1030         if (vp->v_type == VREG &&
1031            ((fp->f_flag & O_APPEND) || (flags & O_FAPPEND))) {
1032                 ioflag |= IO_APPEND;
1033         }
1034
1035         if (flags & O_FBLOCKING) {
1036                 /* ioflag &= ~IO_NDELAY; */
1037         } else if (flags & O_FNONBLOCKING) {
1038                 ioflag |= IO_NDELAY;
1039         } else if (fp->f_flag & FNONBLOCK) {
1040                 ioflag |= IO_NDELAY;
1041         }
1042         if (flags & O_FBUFFERED) {
1043                 /* ioflag &= ~IO_DIRECT; */
1044         } else if (flags & O_FUNBUFFERED) {
1045                 ioflag |= IO_DIRECT;
1046         } else if (fp->f_flag & O_DIRECT) {
1047                 ioflag |= IO_DIRECT;
1048         }
1049         if (flags & O_FASYNCWRITE) {
1050                 /* ioflag &= ~IO_SYNC; */
1051         } else if (flags & O_FSYNCWRITE) {
1052                 ioflag |= IO_SYNC;
1053         } else if (fp->f_flag & O_FSYNC) {
1054                 ioflag |= IO_SYNC;
1055         }
1056
1057         if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))
1058                 ioflag |= IO_SYNC;
1059         ioflag |= sequential_heuristic(uio, fp);
1060
1061         error = dev_dwrite(dev, uio, ioflag);
1062
1063         release_dev(dev);
1064         if (DEVFS_NODE(vp))
1065                 nanotime(&DEVFS_NODE(vp)->mtime);
1066
1067         if ((flags & O_FOFFSET) == 0)
1068                 fp->f_offset = uio->uio_offset;
1069         fp->f_nextoff = uio->uio_offset;
1070 done:
1071         rel_mplock();
1072         //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_write done\n");
1073         return (error);
1074 }
1075
1076
1077 static int
1078 devfs_specf_stat(struct file *fp, struct stat *sb, struct ucred *cred)
1079 {
1080         struct vnode *vp;
1081         int error;
1082
1083         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_stat() called\n");
1084
1085         get_mplock();
1086         vp = (struct vnode *)fp->f_data;
1087         error = vn_stat(vp, sb, cred);
1088         if (error) {
1089                 rel_mplock();
1090                 return (error);
1091         }
1092
1093         struct vattr vattr;
1094         struct vattr *vap;
1095         u_short mode;
1096         cdev_t dev;
1097
1098         vap = &vattr;
1099         error = VOP_GETATTR(vp, vap);
1100         if (error) {
1101                 rel_mplock();
1102                 return (error);
1103         }
1104
1105         /*
1106          * Zero the spare stat fields
1107          */
1108         sb->st_lspare = 0;
1109         sb->st_qspare = 0;
1110
1111         /*
1112          * Copy from vattr table ... or not in case it's a cloned device
1113          */
1114         if (vap->va_fsid != VNOVAL)
1115                 sb->st_dev = vap->va_fsid;
1116         else
1117                 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
1118
1119         sb->st_ino = vap->va_fileid;
1120
1121         mode = vap->va_mode;
1122         mode |= S_IFCHR;
1123         sb->st_mode = mode;
1124
1125         if (vap->va_nlink > (nlink_t)-1)
1126                 sb->st_nlink = (nlink_t)-1;
1127         else
1128                 sb->st_nlink = vap->va_nlink;
1129         sb->st_uid = vap->va_uid;
1130         sb->st_gid = vap->va_gid;
1131         sb->st_rdev = 0;
1132         sb->st_size = vap->va_size;
1133         sb->st_atimespec = vap->va_atime;
1134         sb->st_mtimespec = vap->va_mtime;
1135         sb->st_ctimespec = vap->va_ctime;
1136
1137         /*
1138          * A VCHR and VBLK device may track the last access and last modified
1139          * time independantly of the filesystem.  This is particularly true
1140          * because device read and write calls may bypass the filesystem.
1141          */
1142         if (vp->v_type == VCHR || vp->v_type == VBLK) {
1143                 dev = vp->v_rdev;
1144                 if (dev != NULL) {
1145                         if (dev->si_lastread) {
1146                                 sb->st_atimespec.tv_sec = dev->si_lastread;
1147                                 sb->st_atimespec.tv_nsec = 0;
1148                         }
1149                         if (dev->si_lastwrite) {
1150                                 sb->st_atimespec.tv_sec = dev->si_lastwrite;
1151                                 sb->st_atimespec.tv_nsec = 0;
1152                         }
1153                 }
1154         }
1155
1156         /*
1157          * According to www.opengroup.org, the meaning of st_blksize is
1158          *   "a filesystem-specific preferred I/O block size for this
1159          *    object.  In some filesystem types, this may vary from file
1160          *    to file"
1161          * Default to PAGE_SIZE after much discussion.
1162          */
1163
1164         sb->st_blksize = PAGE_SIZE;
1165
1166         sb->st_flags = vap->va_flags;
1167
1168         error = priv_check_cred(cred, PRIV_VFS_GENERATION, 0);
1169         if (error)
1170                 sb->st_gen = 0;
1171         else
1172                 sb->st_gen = (u_int32_t)vap->va_gen;
1173
1174         sb->st_blocks = vap->va_bytes / S_BLKSIZE;
1175         sb->st_fsmid = vap->va_fsmid;
1176
1177         rel_mplock();
1178         return (0);
1179 }
1180
1181
1182 static int
1183 devfs_specf_kqfilter(struct file *fp, struct knote *kn)
1184 {
1185         struct vnode *vp;
1186         //int ioflag;
1187         int error;
1188         cdev_t dev;
1189
1190         //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_kqfilter() called! \n");
1191
1192         get_mplock();
1193
1194         vp = (struct vnode *)fp->f_data;
1195         if (vp == NULL || vp->v_type == VBAD) {
1196                 error = EBADF;
1197                 goto done;
1198         }
1199
1200         if ((dev = vp->v_rdev) == NULL) {
1201                 error = EBADF;
1202                 goto done;
1203         }
1204         reference_dev(dev);
1205
1206         error = dev_dkqfilter(dev, kn);
1207
1208         release_dev(dev);
1209
1210         if (DEVFS_NODE(vp))
1211                 nanotime(&DEVFS_NODE(vp)->atime);
1212 done:
1213         rel_mplock();
1214         return (error);
1215 }
1216
1217
1218 static int
1219 devfs_specf_poll(struct file *fp, int events, struct ucred *cred)
1220 {
1221         struct vnode *vp;
1222         //int ioflag;
1223         int error;
1224         cdev_t dev;
1225
1226         //devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_poll() called! \n");
1227
1228         get_mplock();
1229
1230         vp = (struct vnode *)fp->f_data;
1231         if (vp == NULL || vp->v_type == VBAD) {
1232                 error = EBADF;
1233                 goto done;
1234         }
1235
1236         if ((dev = vp->v_rdev) == NULL) {
1237                 error = EBADF;
1238                 goto done;
1239         }
1240         reference_dev(dev);
1241         error = dev_dpoll(dev, events);
1242
1243         release_dev(dev);
1244
1245         if (DEVFS_NODE(vp))
1246                 nanotime(&DEVFS_NODE(vp)->atime);
1247 done:
1248         rel_mplock();
1249         return (error);
1250 }
1251
1252
1253 /*
1254  * MPALMOSTSAFE - acquires mplock
1255  */
1256 static int
1257 devfs_specf_ioctl(struct file *fp, u_long com, caddr_t data, struct ucred *ucred)
1258 {
1259         struct vnode *vp = ((struct vnode *)fp->f_data);
1260         struct vnode *ovp;
1261         //struct vattr vattr;
1262         cdev_t  dev;
1263         int error;
1264         struct fiodname_args *name_args;
1265         size_t namlen;
1266         const char *name;
1267
1268         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_ioctl() called! \n");
1269
1270         get_mplock();
1271
1272         if ((dev = vp->v_rdev) == NULL) {
1273                 error = EBADF;          /* device was revoked */
1274                 goto out;
1275         }
1276         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_ioctl() called! for dev %s\n", dev->si_name);
1277
1278         if (!(dev_dflags(dev) & D_TTY))
1279                 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_ioctl() called on %s! com is: %x\n", dev->si_name, com);
1280
1281         if (com == FIODTYPE) {
1282                 *(int *)data = dev_dflags(dev) & D_TYPEMASK;
1283                 error = 0;
1284                 goto out;
1285         } else if (com == FIODNAME) {
1286                 name_args = (struct fiodname_args *)data;
1287                 name = dev->si_name;
1288                 namlen = strlen(name) + 1;
1289
1290                 devfs_debug(DEVFS_DEBUG_DEBUG, "ioctl, got: FIODNAME for %s\n", name);
1291
1292                 if (namlen <= name_args->len)
1293                         error = copyout(dev->si_name, name_args->name, namlen);
1294                 else
1295                         error = EINVAL;
1296
1297                 //name_args->len = namlen; //need _IOWR to enable this
1298                 devfs_debug(DEVFS_DEBUG_DEBUG, "ioctl stuff: error: %d\n", error);
1299                 goto out;
1300         }
1301         reference_dev(dev);
1302         error = dev_dioctl(dev, com, data, fp->f_flag, ucred);
1303         release_dev(dev);
1304         if (DEVFS_NODE(vp)) {
1305                 nanotime(&DEVFS_NODE(vp)->atime);
1306                 nanotime(&DEVFS_NODE(vp)->mtime);
1307         }
1308
1309         if (com == TIOCSCTTY)
1310                 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_ioctl: got TIOCSCTTY on %s\n", dev->si_name);
1311         if (error == 0 && com == TIOCSCTTY) {
1312                 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_ioctl: dealing with TIOCSCTTY on %s\n", dev->si_name);
1313                 struct proc *p = curthread->td_proc;
1314                 struct session *sess;
1315                         if (p == NULL) {
1316                         error = ENOTTY;
1317                         goto out;
1318                 }
1319                 sess = p->p_session;
1320                 /* Do nothing if reassigning same control tty */
1321                 if (sess->s_ttyvp == vp) {
1322                         error = 0;
1323                         goto out;
1324                 }
1325                         /* Get rid of reference to old control tty */
1326                 ovp = sess->s_ttyvp;
1327                 vref(vp);
1328                 sess->s_ttyvp = vp;
1329                 if (ovp)
1330                         vrele(ovp);
1331         }
1332
1333 out:
1334         rel_mplock();
1335         devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_ioctl() finished! \n");
1336         return (error);
1337 }
1338
1339
1340 static int
1341 devfs_spec_fsync(struct vop_fsync_args *ap)
1342 {
1343         struct vnode *vp = ap->a_vp;
1344         int error;
1345
1346         if (!vn_isdisk(vp, NULL))
1347                 return (0);
1348
1349         /*
1350          * Flush all dirty buffers associated with a block device.
1351          */
1352         error = vfsync(vp, ap->a_waitfor, 10000, NULL, NULL);
1353         return (error);
1354 }
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375 static int
1376 devfs_spec_read(struct vop_read_args *ap)
1377 {
1378         struct vnode *vp;
1379         struct uio *uio;
1380         cdev_t dev;
1381         int error;
1382
1383         vp = ap->a_vp;
1384         dev = vp->v_rdev;
1385         uio = ap->a_uio;
1386
1387         if (dev == NULL)                /* device was revoked */
1388                 return (EBADF);
1389         if (uio->uio_resid == 0)
1390                 return (0);
1391
1392         vn_unlock(vp);
1393         error = dev_dread(dev, uio, ap->a_ioflag);
1394         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1395
1396         if (DEVFS_NODE(vp))
1397                 nanotime(&DEVFS_NODE(vp)->atime);
1398
1399         return (error);
1400 }
1401
1402 /*
1403  * Vnode op for write
1404  *
1405  * spec_write(struct vnode *a_vp, struct uio *a_uio, int a_ioflag,
1406  *            struct ucred *a_cred)
1407  */
1408 /* ARGSUSED */
1409 static int
1410 devfs_spec_write(struct vop_write_args *ap)
1411 {
1412         struct vnode *vp;
1413         struct uio *uio;
1414         cdev_t dev;
1415         int error;
1416
1417         vp = ap->a_vp;
1418         dev = vp->v_rdev;
1419         uio = ap->a_uio;
1420
1421         KKASSERT(uio->uio_segflg != UIO_NOCOPY);
1422
1423         if (dev == NULL)                /* device was revoked */
1424                 return (EBADF);
1425
1426         vn_unlock(vp);
1427         error = dev_dwrite(dev, uio, ap->a_ioflag);
1428         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1429
1430         if (DEVFS_NODE(vp))
1431                 nanotime(&DEVFS_NODE(vp)->mtime);
1432
1433         return (error);
1434 }
1435
1436 /*
1437  * Device ioctl operation.
1438  *
1439  * spec_ioctl(struct vnode *a_vp, int a_command, caddr_t a_data,
1440  *            int a_fflag, struct ucred *a_cred)
1441  */
1442 /* ARGSUSED */
1443 static int
1444 devfs_spec_ioctl(struct vop_ioctl_args *ap)
1445 {
1446         cdev_t dev;
1447         struct vnode *vp = ap->a_vp;
1448
1449         if ((dev = vp->v_rdev) == NULL)
1450                 return (EBADF);         /* device was revoked */
1451         if ( ap->a_command == TIOCSCTTY )
1452                 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_*SPEC*_ioctl: got TIOCSCTTY\n");
1453
1454         if (DEVFS_NODE(vp)) {
1455                 nanotime(&DEVFS_NODE(vp)->atime);
1456                 nanotime(&DEVFS_NODE(vp)->mtime);
1457         }
1458
1459         return (dev_dioctl(dev, ap->a_command, ap->a_data,
1460                     ap->a_fflag, ap->a_cred));
1461 }
1462
1463 /*
1464  * spec_poll(struct vnode *a_vp, int a_events, struct ucred *a_cred)
1465  */
1466 /* ARGSUSED */
1467 static int
1468 devfs_spec_poll(struct vop_poll_args *ap)
1469 {
1470         cdev_t dev;
1471         struct vnode *vp = ap->a_vp;
1472
1473         if ((dev = vp->v_rdev) == NULL)
1474                 return (EBADF);         /* device was revoked */
1475
1476         if (DEVFS_NODE(vp))
1477                 nanotime(&DEVFS_NODE(vp)->atime);
1478
1479         return (dev_dpoll(dev, ap->a_events));
1480 }
1481
1482 /*
1483  * spec_kqfilter(struct vnode *a_vp, struct knote *a_kn)
1484  */
1485 /* ARGSUSED */
1486 static int
1487 devfs_spec_kqfilter(struct vop_kqfilter_args *ap)
1488 {
1489         cdev_t dev;
1490         struct vnode *vp = ap->a_vp;
1491
1492         if ((dev = vp->v_rdev) == NULL)
1493                 return (EBADF);         /* device was revoked */
1494
1495         if (DEVFS_NODE(vp))
1496                 nanotime(&DEVFS_NODE(vp)->atime);
1497
1498         return (dev_dkqfilter(dev, ap->a_kn));
1499 }
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542 /*
1543  * Convert a vnode strategy call into a device strategy call.  Vnode strategy
1544  * calls are not limited to device DMA limits so we have to deal with the
1545  * case.
1546  *
1547  * spec_strategy(struct vnode *a_vp, struct bio *a_bio)
1548  */
1549 static int
1550 devfs_spec_strategy(struct vop_strategy_args *ap)
1551 {
1552         struct bio *bio = ap->a_bio;
1553         struct buf *bp = bio->bio_buf;
1554         struct buf *nbp;
1555         struct vnode *vp;
1556         struct mount *mp;
1557         int chunksize;
1558         int maxiosize;
1559
1560         if (bp->b_cmd != BUF_CMD_READ && LIST_FIRST(&bp->b_dep) != NULL)
1561                 buf_start(bp);
1562
1563         /*
1564          * Collect statistics on synchronous and asynchronous read
1565          * and write counts for disks that have associated filesystems.
1566          */
1567         vp = ap->a_vp;
1568         KKASSERT(vp->v_rdev != NULL);   /* XXX */
1569         if (vn_isdisk(vp, NULL) && (mp = vp->v_rdev->si_mountpoint) != NULL) {
1570                 if (bp->b_cmd == BUF_CMD_READ) {
1571                         //XXX: no idea what has changed here...
1572                         if (bp->b_flags & BIO_SYNC)
1573                                 mp->mnt_stat.f_syncreads++;
1574                         else
1575                                 mp->mnt_stat.f_asyncreads++;
1576                 } else {
1577                         if (bp->b_flags & BIO_SYNC)
1578                                 mp->mnt_stat.f_syncwrites++;
1579                         else
1580                                 mp->mnt_stat.f_asyncwrites++;
1581                 }
1582         }
1583
1584         /*
1585          * Device iosize limitations only apply to read and write.  Shortcut
1586          * the I/O if it fits.
1587          */
1588         if ((maxiosize = vp->v_rdev->si_iosize_max) == 0) {
1589                 devfs_debug(DEVFS_DEBUG_DEBUG, "%s: si_iosize_max not set!\n", dev_dname(vp->v_rdev));
1590                 maxiosize = MAXPHYS;
1591         }
1592 #if SPEC_CHAIN_DEBUG & 2
1593         maxiosize = 4096;
1594 #endif
1595         if (bp->b_bcount <= maxiosize ||
1596             (bp->b_cmd != BUF_CMD_READ && bp->b_cmd != BUF_CMD_WRITE)) {
1597                 dev_dstrategy_chain(vp->v_rdev, bio);
1598                 return (0);
1599         }
1600
1601         /*
1602          * Clone the buffer and set up an I/O chain to chunk up the I/O.
1603          */
1604         nbp = kmalloc(sizeof(*bp), M_DEVBUF, M_INTWAIT|M_ZERO);
1605         initbufbio(nbp);
1606         buf_dep_init(nbp);
1607         BUF_LOCKINIT(nbp);
1608         BUF_LOCK(nbp, LK_EXCLUSIVE);
1609         BUF_KERNPROC(nbp);
1610         nbp->b_vp = vp;
1611         nbp->b_flags = B_PAGING | (bp->b_flags & B_BNOCLIP);
1612         nbp->b_data = bp->b_data;
1613         nbp->b_bio1.bio_done = devfs_spec_strategy_done;
1614         nbp->b_bio1.bio_offset = bio->bio_offset;
1615         nbp->b_bio1.bio_caller_info1.ptr = bio;
1616
1617         /*
1618          * Start the first transfer
1619          */
1620         if (vn_isdisk(vp, NULL))
1621                 chunksize = vp->v_rdev->si_bsize_phys;
1622         else
1623                 chunksize = DEV_BSIZE;
1624         chunksize = maxiosize / chunksize * chunksize;
1625 #if SPEC_CHAIN_DEBUG & 1
1626         devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy chained I/O chunksize=%d\n", chunksize);
1627 #endif
1628         nbp->b_cmd = bp->b_cmd;
1629         nbp->b_bcount = chunksize;
1630         nbp->b_bufsize = chunksize;     /* used to detect a short I/O */
1631         nbp->b_bio1.bio_caller_info2.index = chunksize;
1632
1633 #if SPEC_CHAIN_DEBUG & 1
1634         devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p offset %d/%d bcount %d\n",
1635                 bp, 0, bp->b_bcount, nbp->b_bcount);
1636 #endif
1637
1638         dev_dstrategy(vp->v_rdev, &nbp->b_bio1);
1639
1640         if (DEVFS_NODE(vp)) {
1641                 nanotime(&DEVFS_NODE(vp)->atime);
1642                 nanotime(&DEVFS_NODE(vp)->mtime);
1643         }
1644
1645         return (0);
1646 }
1647
1648 /*
1649  * Chunked up transfer completion routine - chain transfers until done
1650  */
1651 static
1652 void
1653 devfs_spec_strategy_done(struct bio *nbio)
1654 {
1655         struct buf *nbp = nbio->bio_buf;
1656         struct bio *bio = nbio->bio_caller_info1.ptr;   /* original bio */
1657         struct buf *bp = bio->bio_buf;                  /* original bp */
1658         int chunksize = nbio->bio_caller_info2.index;   /* chunking */
1659         int boffset = nbp->b_data - bp->b_data;
1660
1661         if (nbp->b_flags & B_ERROR) {
1662                 /*
1663                  * An error terminates the chain, propogate the error back
1664                  * to the original bp
1665                  */
1666                 bp->b_flags |= B_ERROR;
1667                 bp->b_error = nbp->b_error;
1668                 bp->b_resid = bp->b_bcount - boffset +
1669                               (nbp->b_bcount - nbp->b_resid);
1670 #if SPEC_CHAIN_DEBUG & 1
1671                 devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p error %d bcount %d/%d\n",
1672                         bp, bp->b_error, bp->b_bcount,
1673                         bp->b_bcount - bp->b_resid);
1674 #endif
1675                 kfree(nbp, M_DEVBUF);
1676                 biodone(bio);
1677         } else if (nbp->b_resid) {
1678                 /*
1679                  * A short read or write terminates the chain
1680                  */
1681                 bp->b_error = nbp->b_error;
1682                 bp->b_resid = bp->b_bcount - boffset +
1683                               (nbp->b_bcount - nbp->b_resid);
1684 #if SPEC_CHAIN_DEBUG & 1
1685                 devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p short read(1) bcount %d/%d\n",
1686                         bp, bp->b_bcount - bp->b_resid, bp->b_bcount);
1687 #endif
1688                 kfree(nbp, M_DEVBUF);
1689                 biodone(bio);
1690         } else if (nbp->b_bcount != nbp->b_bufsize) {
1691                 /*
1692                  * A short read or write can also occur by truncating b_bcount
1693                  */
1694 #if SPEC_CHAIN_DEBUG & 1
1695                 devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p short read(2) bcount %d/%d\n",
1696                         bp, nbp->b_bcount + boffset, bp->b_bcount);
1697 #endif
1698                 bp->b_error = 0;
1699                 bp->b_bcount = nbp->b_bcount + boffset;
1700                 bp->b_resid = nbp->b_resid;
1701                 kfree(nbp, M_DEVBUF);
1702                 biodone(bio);
1703         } else if (nbp->b_bcount + boffset == bp->b_bcount) {
1704                 /*
1705                  * No more data terminates the chain
1706                  */
1707 #if SPEC_CHAIN_DEBUG & 1
1708                 devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p finished bcount %d\n",
1709                         bp, bp->b_bcount);
1710 #endif
1711                 bp->b_error = 0;
1712                 bp->b_resid = 0;
1713                 kfree(nbp, M_DEVBUF);
1714                 biodone(bio);
1715         } else {
1716                 /*
1717                  * Continue the chain
1718                  */
1719                 boffset += nbp->b_bcount;
1720                 nbp->b_data = bp->b_data + boffset;
1721                 nbp->b_bcount = bp->b_bcount - boffset;
1722                 if (nbp->b_bcount > chunksize)
1723                         nbp->b_bcount = chunksize;
1724                 nbp->b_bio1.bio_done = devfs_spec_strategy_done;
1725                 nbp->b_bio1.bio_offset = bio->bio_offset + boffset;
1726
1727 #if SPEC_CHAIN_DEBUG & 1
1728                 devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p offset %d/%d bcount %d\n",
1729                         bp, boffset, bp->b_bcount, nbp->b_bcount);
1730 #endif
1731
1732                 dev_dstrategy(nbp->b_vp->v_rdev, &nbp->b_bio1);
1733         }
1734 }
1735
1736 /*
1737  * spec_freeblks(struct vnode *a_vp, daddr_t a_addr, daddr_t a_length)
1738  */
1739 static int
1740 devfs_spec_freeblks(struct vop_freeblks_args *ap)
1741 {
1742         struct buf *bp;
1743
1744         /*
1745          * XXX: This assumes that strategy does the deed right away.
1746          * XXX: this may not be TRTTD.
1747          */
1748         KKASSERT(ap->a_vp->v_rdev != NULL);
1749         if ((dev_dflags(ap->a_vp->v_rdev) & D_CANFREE) == 0)
1750                 return (0);
1751         bp = geteblk(ap->a_length);
1752         bp->b_cmd = BUF_CMD_FREEBLKS;
1753         bp->b_bio1.bio_offset = ap->a_offset;
1754         bp->b_bcount = ap->a_length;
1755         dev_dstrategy(ap->a_vp->v_rdev, &bp->b_bio1);
1756         return (0);
1757 }
1758
1759 /*
1760  * Implement degenerate case where the block requested is the block
1761  * returned, and assume that the entire device is contiguous in regards
1762  * to the contiguous block range (runp and runb).
1763  *
1764  * spec_bmap(struct vnode *a_vp, off_t a_loffset,
1765  *           off_t *a_doffsetp, int *a_runp, int *a_runb)
1766  */
1767 static int
1768 devfs_spec_bmap(struct vop_bmap_args *ap)
1769 {
1770         if (ap->a_doffsetp != NULL)
1771                 *ap->a_doffsetp = ap->a_loffset;
1772         if (ap->a_runp != NULL)
1773                 *ap->a_runp = MAXBSIZE;
1774         if (ap->a_runb != NULL) {
1775                 if (ap->a_loffset < MAXBSIZE)
1776                         *ap->a_runb = (int)ap->a_loffset;
1777                 else
1778                         *ap->a_runb = MAXBSIZE;
1779         }
1780         return (0);
1781 }
1782
1783
1784 /*
1785  * Special device advisory byte-level locks.
1786  *
1787  * spec_advlock(struct vnode *a_vp, caddr_t a_id, int a_op,
1788  *              struct flock *a_fl, int a_flags)
1789  */
1790 /* ARGSUSED */
1791 static int
1792 devfs_spec_advlock(struct vop_advlock_args *ap)
1793 {
1794         return ((ap->a_flags & F_POSIX) ? EINVAL : EOPNOTSUPP);
1795 }
1796
1797 static void
1798 devfs_spec_getpages_iodone(struct bio *bio)
1799 {
1800         bio->bio_buf->b_cmd = BUF_CMD_DONE;
1801         wakeup(bio->bio_buf);
1802 }
1803
1804 /*
1805  * spec_getpages() - get pages associated with device vnode.
1806  *
1807  * Note that spec_read and spec_write do not use the buffer cache, so we
1808  * must fully implement getpages here.
1809  */
1810 static int
1811 devfs_spec_getpages(struct vop_getpages_args *ap)
1812 {
1813         vm_offset_t kva;
1814         int error;
1815         int i, pcount, size;
1816         struct buf *bp;
1817         vm_page_t m;
1818         vm_ooffset_t offset;
1819         int toff, nextoff, nread;
1820         struct vnode *vp = ap->a_vp;
1821         int blksiz;
1822         int gotreqpage;
1823
1824         error = 0;
1825         pcount = round_page(ap->a_count) / PAGE_SIZE;
1826
1827         /*
1828          * Calculate the offset of the transfer and do sanity check.
1829          */
1830         offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset;
1831
1832         /*
1833          * Round up physical size for real devices.  We cannot round using
1834          * v_mount's block size data because v_mount has nothing to do with
1835          * the device.  i.e. it's usually '/dev'.  We need the physical block
1836          * size for the device itself.
1837          *
1838          * We can't use v_rdev->si_mountpoint because it only exists when the
1839          * block device is mounted.  However, we can use v_rdev.
1840          */
1841
1842         if (vn_isdisk(vp, NULL))
1843                 blksiz = vp->v_rdev->si_bsize_phys;
1844         else
1845                 blksiz = DEV_BSIZE;
1846
1847         size = (ap->a_count + blksiz - 1) & ~(blksiz - 1);
1848
1849         bp = getpbuf(NULL);
1850         kva = (vm_offset_t)bp->b_data;
1851
1852         /*
1853          * Map the pages to be read into the kva.
1854          */
1855         pmap_qenter(kva, ap->a_m, pcount);
1856
1857         /* Build a minimal buffer header. */
1858         bp->b_cmd = BUF_CMD_READ;
1859         bp->b_bcount = size;
1860         bp->b_resid = 0;
1861         bp->b_runningbufspace = size;
1862         if (size) {
1863                 runningbufspace += bp->b_runningbufspace;
1864                 ++runningbufcount;
1865         }
1866
1867         bp->b_bio1.bio_offset = offset;
1868         bp->b_bio1.bio_done = devfs_spec_getpages_iodone;
1869
1870         mycpu->gd_cnt.v_vnodein++;
1871         mycpu->gd_cnt.v_vnodepgsin += pcount;
1872
1873         /* Do the input. */
1874         vn_strategy(ap->a_vp, &bp->b_bio1);
1875
1876         crit_enter();
1877
1878         /* We definitely need to be at splbio here. */
1879         while (bp->b_cmd != BUF_CMD_DONE)
1880                 tsleep(bp, 0, "spread", 0);
1881
1882         crit_exit();
1883
1884         if (bp->b_flags & B_ERROR) {
1885                 if (bp->b_error)
1886                         error = bp->b_error;
1887                 else
1888                         error = EIO;
1889         }
1890
1891         /*
1892          * If EOF is encountered we must zero-extend the result in order
1893          * to ensure that the page does not contain garabge.  When no
1894          * error occurs, an early EOF is indicated if b_bcount got truncated.
1895          * b_resid is relative to b_bcount and should be 0, but some devices
1896          * might indicate an EOF with b_resid instead of truncating b_bcount.
1897          */
1898         nread = bp->b_bcount - bp->b_resid;
1899         if (nread < ap->a_count)
1900                 bzero((caddr_t)kva + nread, ap->a_count - nread);
1901         pmap_qremove(kva, pcount);
1902
1903         gotreqpage = 0;
1904         for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) {
1905                 nextoff = toff + PAGE_SIZE;
1906                 m = ap->a_m[i];
1907
1908                 m->flags &= ~PG_ZERO;
1909
1910                 if (nextoff <= nread) {
1911                         m->valid = VM_PAGE_BITS_ALL;
1912                         vm_page_undirty(m);
1913                 } else if (toff < nread) {
1914                         /*
1915                          * Since this is a VM request, we have to supply the
1916                          * unaligned offset to allow vm_page_set_validclean()
1917                          * to zero sub-DEV_BSIZE'd portions of the page.
1918                          */
1919                         vm_page_set_validclean(m, 0, nread - toff);
1920                 } else {
1921                         m->valid = 0;
1922                         vm_page_undirty(m);
1923                 }
1924
1925                 if (i != ap->a_reqpage) {
1926                         /*
1927                          * Just in case someone was asking for this page we
1928                          * now tell them that it is ok to use.
1929                          */
1930                         if (!error || (m->valid == VM_PAGE_BITS_ALL)) {
1931                                 if (m->valid) {
1932                                         if (m->flags & PG_WANTED) {
1933                                                 vm_page_activate(m);
1934                                         } else {
1935                                                 vm_page_deactivate(m);
1936                                         }
1937                                         vm_page_wakeup(m);
1938                                 } else {
1939                                         vm_page_free(m);
1940                                 }
1941                         } else {
1942                                 vm_page_free(m);
1943                         }
1944                 } else if (m->valid) {
1945                         gotreqpage = 1;
1946                         /*
1947                          * Since this is a VM request, we need to make the
1948                          * entire page presentable by zeroing invalid sections.
1949                          */
1950                         if (m->valid != VM_PAGE_BITS_ALL)
1951                             vm_page_zero_invalid(m, FALSE);
1952                 }
1953         }
1954         if (!gotreqpage) {
1955                 m = ap->a_m[ap->a_reqpage];
1956                 devfs_debug(DEVFS_DEBUG_WARNING,
1957             "spec_getpages:(%s) I/O read failure: (error=%d) bp %p vp %p\n",
1958                         devtoname(vp->v_rdev), error, bp, bp->b_vp);
1959                 devfs_debug(DEVFS_DEBUG_WARNING,
1960             "               size: %d, resid: %d, a_count: %d, valid: 0x%x\n",
1961                     size, bp->b_resid, ap->a_count, m->valid);
1962                 devfs_debug(DEVFS_DEBUG_WARNING,
1963             "               nread: %d, reqpage: %d, pindex: %lu, pcount: %d\n",
1964                     nread, ap->a_reqpage, (u_long)m->pindex, pcount);
1965                 /*
1966                  * Free the buffer header back to the swap buffer pool.
1967                  */
1968                 relpbuf(bp, NULL);
1969                 return VM_PAGER_ERROR;
1970         }
1971         /*
1972          * Free the buffer header back to the swap buffer pool.
1973          */
1974         relpbuf(bp, NULL);
1975         return VM_PAGER_OK;
1976 }
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016 static __inline
2017 int
2018 sequential_heuristic(struct uio *uio, struct file *fp)
2019 {
2020         /*
2021          * Sequential heuristic - detect sequential operation
2022          */
2023         if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
2024             uio->uio_offset == fp->f_nextoff) {
2025                 int tmpseq = fp->f_seqcount;
2026                 /*
2027                  * XXX we assume that the filesystem block size is
2028                  * the default.  Not true, but still gives us a pretty
2029                  * good indicator of how sequential the read operations
2030                  * are.
2031                  */
2032                 tmpseq += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE;
2033                 if (tmpseq > IO_SEQMAX)
2034                         tmpseq = IO_SEQMAX;
2035                 fp->f_seqcount = tmpseq;
2036                 return(fp->f_seqcount << IO_SEQSHIFT);
2037         }
2038
2039         /*
2040          * Not sequential, quick draw-down of seqcount
2041          */
2042         if (fp->f_seqcount > 1)
2043                 fp->f_seqcount = 1;
2044         else
2045                 fp->f_seqcount = 0;
2046         return(0);
2047 }