sys/vfs/union/union_vnops.c

   1 /*
   2  * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
   3  * Copyright (c) 1992, 1993, 1994, 1995
   4  *      The Regents of the University of California.  All rights reserved.
   5  *
   6  * This code is derived from software contributed to Berkeley by
   7  * Jan-Simon Pendry.
   8  *
   9  * Redistribution and use in source and binary forms, with or without
  10  * modification, are permitted provided that the following conditions
  11  * are met:
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in the
  16  *    documentation and/or other materials provided with the distribution.
  17  * 3. Neither the name of the University nor the names of its contributors
  18  *    may be used to endorse or promote products derived from this software
  19  *    without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  31  * SUCH DAMAGE.
  32  *
  33  *      @(#)union_vnops.c       8.32 (Berkeley) 6/23/95
  34  * $FreeBSD: src/sys/miscfs/union/union_vnops.c,v 1.72 1999/12/15 23:02:14 eivind Exp $
  35  * $DragonFly: src/sys/vfs/union/union_vnops.c,v 1.39 2007/11/20 21:03:51 dillon Exp $
  36  */
  37
  38 #include <sys/param.h>
  39 #include <sys/systm.h>
  40 #include <sys/proc.h>
  41 #include <sys/fcntl.h>
  42 #include <sys/stat.h>
  43 #include <sys/kernel.h>
  44 #include <sys/vnode.h>
  45 #include <sys/mount.h>
  46 #include <sys/namei.h>
  47 #include <sys/malloc.h>
  48 #include <sys/buf.h>
  49 #include <sys/lock.h>
  50 #include <sys/sysctl.h>
  51 #include "union.h"
  52
  53 #include <vm/vm.h>
  54 #include <vm/vnode_pager.h>
  55
  56 #include <vm/vm_page.h>
  57 #include <vm/vm_object.h>
  58
  59 int uniondebug = 0;
  60
  61 #if UDEBUG_ENABLED
  62 SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RW, &uniondebug, 0, "");
  63 #else
  64 SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RD, &uniondebug, 0, "");
  65 #endif
  66
  67 static int      union_access (struct vop_access_args *ap);
  68 static int      union_advlock (struct vop_advlock_args *ap);
  69 static int      union_bmap (struct vop_bmap_args *ap);
  70 static int      union_close (struct vop_close_args *ap);
  71 static int      union_create (struct vop_old_create_args *ap);
  72 static int      union_fsync (struct vop_fsync_args *ap);
  73 static int      union_getattr (struct vop_getattr_args *ap);
  74 static int      union_inactive (struct vop_inactive_args *ap);
  75 static int      union_ioctl (struct vop_ioctl_args *ap);
  76 static int      union_link (struct vop_old_link_args *ap);
  77 static int      union_lookup (struct vop_old_lookup_args *ap);
  78 static int      union_lookup1 (struct vnode *udvp, struct vnode **dvp,
  79                                    struct vnode **vpp,
  80                                    struct componentname *cnp);
  81 static int      union_mkdir (struct vop_old_mkdir_args *ap);
  82 static int      union_mknod (struct vop_old_mknod_args *ap);
  83 static int      union_mmap (struct vop_mmap_args *ap);
  84 static int      union_open (struct vop_open_args *ap);
  85 static int      union_pathconf (struct vop_pathconf_args *ap);
  86 static int      union_print (struct vop_print_args *ap);
  87 static int      union_read (struct vop_read_args *ap);
  88 static int      union_readdir (struct vop_readdir_args *ap);
  89 static int      union_readlink (struct vop_readlink_args *ap);
  90 static int      union_reclaim (struct vop_reclaim_args *ap);
  91 static int      union_remove (struct vop_old_remove_args *ap);
  92 static int      union_rename (struct vop_old_rename_args *ap);
  93 static int      union_rmdir (struct vop_old_rmdir_args *ap);
  94 static int      union_poll (struct vop_poll_args *ap);
  95 static int      union_setattr (struct vop_setattr_args *ap);
  96 static int      union_strategy (struct vop_strategy_args *ap);
  97 static int      union_getpages (struct vop_getpages_args *ap);
  98 static int      union_putpages (struct vop_putpages_args *ap);
  99 static int      union_symlink (struct vop_old_symlink_args *ap);
 100 static int      union_whiteout (struct vop_old_whiteout_args *ap);
 101 static int      union_write (struct vop_read_args *ap);
 102
 103 static __inline
 104 struct vnode *
 105 union_lock_upper(struct union_node *un, struct thread *td)
 106 {
 107         struct vnode *uppervp;
 108
 109         if ((uppervp = un->un_uppervp) != NULL) {
 110                 vref(uppervp);
 111                 vn_lock(uppervp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
 112         }
 113         KASSERT((uppervp == NULL || VREFCNT(uppervp) > 0),
 114                 ("uppervp usecount is 0"));
 115         return(uppervp);
 116 }
 117
 118 static __inline
 119 struct vnode *
 120 union_ref_upper(struct union_node *un)
 121 {
 122         struct vnode *uppervp;
 123
 124         if ((uppervp = un->un_uppervp) != NULL) {
 125                 vref(uppervp);
 126                 if (uppervp->v_flag & VRECLAIMED) {
 127                         vrele(uppervp);
 128                         return (NULLVP);
 129                 }
 130         }
 131         return (uppervp);
 132 }
 133
 134 static __inline
 135 void
 136 union_unlock_upper(struct vnode *uppervp, struct thread *td)
 137 {
 138         vput(uppervp);
 139 }
 140
 141 static __inline
 142 struct vnode *
 143 union_lock_other(struct union_node *un, struct thread *td)
 144 {
 145         struct vnode *vp;
 146
 147         if (un->un_uppervp != NULL) {
 148                 vp = union_lock_upper(un, td);
 149         } else if ((vp = un->un_lowervp) != NULL) {
 150                 vref(vp);
 151                 vn_lock(vp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
 152         }
 153         return(vp);
 154 }
 155
 156 static __inline
 157 void
 158 union_unlock_other(struct vnode *vp, struct thread *td)
 159 {
 160         vput(vp);
 161 }
 162
 163 /*
 164  *      union_lookup:
 165  *
 166  *      udvp    must be exclusively locked on call and will remain
 167  *              exclusively locked on return.  This is the mount point
 168  *              for out filesystem.
 169  *
 170  *      dvp     Our base directory, locked and referenced.
 171  *              The passed dvp will be dereferenced and unlocked on return
 172  *              and a new dvp will be returned which is locked and
 173  *              referenced in the same variable.
 174  *
 175  *      vpp     is filled in with the result if no error occured,
 176  *              locked and ref'd.
 177  *
 178  *              If an error is returned, *vpp is set to NULLVP.  If no
 179  *              error occurs, *vpp is returned with a reference and an
 180  *              exclusive lock.
 181  */
 182
 183 static int
 184 union_lookup1(struct vnode *udvp, struct vnode **pdvp, struct vnode **vpp,
 185               struct componentname *cnp)
 186 {
 187         int error;
 188         struct thread *td = cnp->cn_td;
 189         struct vnode *dvp = *pdvp;
 190         struct vnode *tdvp;
 191         struct mount *mp;
 192
 193         /*
 194          * If stepping up the directory tree, check for going
 195          * back across the mount point, in which case do what
 196          * lookup would do by stepping back down the mount
 197          * hierarchy.
 198          */
 199         if (cnp->cn_flags & CNP_ISDOTDOT) {
 200                 while ((dvp != udvp) && (dvp->v_flag & VROOT)) {
 201                         /*
 202                          * Don't do the NOCROSSMOUNT check
 203                          * at this level.  By definition,
 204                          * union fs deals with namespaces, not
 205                          * filesystems.
 206                          */
 207                         tdvp = dvp;
 208                         dvp = dvp->v_mount->mnt_vnodecovered;
 209                         vref(dvp);
 210                         vput(tdvp);
 211                         vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
 212                 }
 213         }
 214
 215         /*
 216          * Set return dvp to be the upperdvp 'parent directory.
 217          */
 218         *pdvp = dvp;
 219
 220         /*
 221          * If the VOP_LOOKUP call generates an error, tdvp is invalid and no
 222          * changes will have been made to dvp, so we are set to return.
 223          */
 224
 225         error = VOP_LOOKUP(dvp, &tdvp, cnp);
 226         if (error) {
 227                 UDEBUG(("dvp %p error %d flags %lx\n", dvp, error, cnp->cn_flags));
 228                 *vpp = NULL;
 229                 return (error);
 230         }
 231
 232         /*
 233          * The parent directory will have been unlocked, unless lookup
 234          * found the last component or if dvp == tdvp (tdvp must be locked).
 235          *
 236          * We want our dvp to remain locked and ref'd.  We also want tdvp
 237          * to remain locked and ref'd.
 238          */
 239         UDEBUG(("parentdir %p result %p flag %lx\n", dvp, tdvp, cnp->cn_flags));
 240
 241 #if 0
 242         if (dvp != tdvp && (cnp->cn_flags & CNP_XXXISLASTCN) == 0)
 243                 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
 244 #endif
 245
 246         /*
 247          * Lastly check if the current node is a mount point in
 248          * which case walk up the mount hierarchy making sure not to
 249          * bump into the root of the mount tree (ie. dvp != udvp).
 250          *
 251          * We use dvp as a temporary variable here, it is no longer related
 252          * to the dvp above.  However, we have to ensure that both *pdvp and
 253          * tdvp are locked on return.
 254          */
 255
 256         dvp = tdvp;
 257         while (
 258             dvp != udvp &&
 259             (dvp->v_type == VDIR) &&
 260             (mp = dvp->v_mountedhere)
 261         ) {
 262                 int relock_pdvp = 0;
 263
 264                 if (vfs_busy(mp, 0))
 265                         continue;
 266
 267                 if (dvp == *pdvp)
 268                         relock_pdvp = 1;
 269                 vput(dvp);
 270                 dvp = NULL;
 271                 error = VFS_ROOT(mp, &dvp);
 272
 273                 vfs_unbusy(mp);
 274
 275                 if (relock_pdvp)
 276                         vn_lock(*pdvp, LK_EXCLUSIVE | LK_RETRY);
 277
 278                 if (error) {
 279                         *vpp = NULL;
 280                         return (error);
 281                 }
 282         }
 283         *vpp = dvp;
 284         return (0);
 285 }
 286
 287 /*
 288  * union_lookup(struct vnode *a_dvp, struct vnode **a_vpp,
 289  *              struct componentname *a_cnp)
 290  */
 291 static int
 292 union_lookup(struct vop_old_lookup_args *ap)
 293 {
 294         int error;
 295         int uerror, lerror;
 296         struct vnode *uppervp, *lowervp;
 297         struct vnode *upperdvp, *lowerdvp;
 298         struct vnode *dvp = ap->a_dvp;          /* starting dir */
 299         struct union_node *dun = VTOUNION(dvp); /* associated union node */
 300         struct componentname *cnp = ap->a_cnp;
 301         struct thread *td = cnp->cn_td;
 302         int lockparent = cnp->cn_flags & CNP_LOCKPARENT;
 303         struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
 304         struct ucred *saved_cred = NULL;
 305         int iswhiteout;
 306         struct vattr va;
 307
 308         *ap->a_vpp = NULLVP;
 309
 310         /*
 311          * Disallow write attemps to the filesystem mounted read-only.
 312          */
 313         if ((dvp->v_mount->mnt_flag & MNT_RDONLY) &&
 314             (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME)) {
 315                 return (EROFS);
 316         }
 317
 318         /*
 319          * For any lookup's we do, always return with the parent locked
 320          */
 321         cnp->cn_flags |= CNP_LOCKPARENT;
 322
 323         lowerdvp = dun->un_lowervp;
 324         uppervp = NULLVP;
 325         lowervp = NULLVP;
 326         iswhiteout = 0;
 327
 328         uerror = ENOENT;
 329         lerror = ENOENT;
 330
 331         /*
 332          * Get a private lock on uppervp and a reference, effectively
 333          * taking it out of the union_node's control.
 334          *
 335          * We must lock upperdvp while holding our lock on dvp
 336          * to avoid a deadlock.
 337          */
 338         upperdvp = union_lock_upper(dun, td);
 339
 340         /*
 341          * do the lookup in the upper level.
 342          * if that level comsumes additional pathnames,
 343          * then assume that something special is going
 344          * on and just return that vnode.
 345          */
 346         if (upperdvp != NULLVP) {
 347                 /*
 348                  * We do not have to worry about the DOTDOT case, we've
 349                  * already unlocked dvp.
 350                  */
 351                 UDEBUG(("A %p\n", upperdvp));
 352
 353                 /*
 354                  * Do the lookup.   We must supply a locked and referenced
 355                  * upperdvp to the function and will get a new locked and
 356                  * referenced upperdvp back with the old having been
 357                  * dereferenced.
 358                  *
 359                  * If an error is returned, uppervp will be NULLVP.  If no
 360                  * error occurs, uppervp will be the locked and referenced
 361                  * return vnode or possibly NULL, depending on what is being
 362                  * requested.  It is possible that the returned uppervp
 363                  * will be the same as upperdvp.
 364                  */
 365                 uerror = union_lookup1(um->um_uppervp, &upperdvp, &uppervp, cnp);
 366                 UDEBUG((
 367                     "uerror %d upperdvp %p %d/%d, uppervp %p ref=%d/lck=%d\n",
 368                     uerror,
 369                     upperdvp,
 370                     VREFCNT(upperdvp),
 371                     vn_islocked(upperdvp),
 372                     uppervp,
 373                     (uppervp ? VREFCNT(uppervp) : -99),
 374                     (uppervp ? vn_islocked(uppervp) : -99)
 375                 ));
 376
 377                 /*
 378                  * Disallow write attemps to the filesystem mounted read-only.
 379                  */
 380                 if (uerror == EJUSTRETURN &&
 381                     (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
 382                     (cnp->cn_nameiop == NAMEI_CREATE || cnp->cn_nameiop == NAMEI_RENAME)) {
 383                         error = EROFS;
 384                         goto out;
 385                 }
 386
 387                 /*
 388                  * Special case.  If cn_consume != 0 skip out.  The result
 389                  * of the lookup is transfered to our return variable.  If
 390                  * an error occured we have to throw away the results.
 391                  */
 392
 393                 if (cnp->cn_consume != 0) {
 394                         if ((error = uerror) == 0) {
 395                                 *ap->a_vpp = uppervp;
 396                                 uppervp = NULL;
 397                         }
 398                         goto out;
 399                 }
 400
 401                 /*
 402                  * Calculate whiteout, fall through
 403                  */
 404
 405                 if (uerror == ENOENT || uerror == EJUSTRETURN) {
 406                         if (cnp->cn_flags & CNP_ISWHITEOUT) {
 407                                 iswhiteout = 1;
 408                         } else if (lowerdvp != NULLVP) {
 409                                 int terror;
 410
 411                                 terror = VOP_GETATTR(upperdvp, &va);
 412                                 if (terror == 0 && (va.va_flags & OPAQUE))
 413                                         iswhiteout = 1;
 414                         }
 415                 }
 416         }
 417
 418         /*
 419          * in a similar way to the upper layer, do the lookup
 420          * in the lower layer.   this time, if there is some
 421          * component magic going on, then vput whatever we got
 422          * back from the upper layer and return the lower vnode
 423          * instead.
 424          */
 425
 426         if (lowerdvp != NULLVP && !iswhiteout) {
 427                 int nameiop;
 428
 429                 UDEBUG(("B %p\n", lowerdvp));
 430
 431                 /*
 432                  * Force only LOOKUPs on the lower node, since
 433                  * we won't be making changes to it anyway.
 434                  */
 435                 nameiop = cnp->cn_nameiop;
 436                 cnp->cn_nameiop = NAMEI_LOOKUP;
 437                 if (um->um_op == UNMNT_BELOW) {
 438                         saved_cred = cnp->cn_cred;
 439                         cnp->cn_cred = um->um_cred;
 440                 }
 441
 442                 /*
 443                  * We shouldn't have to worry about locking interactions
 444                  * between the lower layer and our union layer (w.r.t.
 445                  * `..' processing) because we don't futz with lowervp
 446                  * locks in the union-node instantiation code path.
 447                  *
 448                  * union_lookup1() requires lowervp to be locked on entry,
 449                  * and it will be unlocked on return.  The ref count will
 450                  * not change.  On return lowervp doesn't represent anything
 451                  * to us so we NULL it out.
 452                  */
 453                 vref(lowerdvp);
 454                 vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
 455                 lerror = union_lookup1(um->um_lowervp, &lowerdvp, &lowervp, cnp);
 456                 if (lowerdvp == lowervp)
 457                         vrele(lowerdvp);
 458                 else
 459                         vput(lowerdvp);
 460                 lowerdvp = NULL;        /* lowerdvp invalid after vput */
 461
 462                 if (um->um_op == UNMNT_BELOW)
 463                         cnp->cn_cred = saved_cred;
 464                 cnp->cn_nameiop = nameiop;
 465
 466                 if (cnp->cn_consume != 0 || lerror == EACCES) {
 467                         if ((error = lerror) == 0) {
 468                                 *ap->a_vpp = lowervp;
 469                                 lowervp = NULL;
 470                         }
 471                         goto out;
 472                 }
 473         } else {
 474                 UDEBUG(("C %p\n", lowerdvp));
 475                 if ((cnp->cn_flags & CNP_ISDOTDOT) && dun->un_pvp != NULLVP) {
 476                         if ((lowervp = LOWERVP(dun->un_pvp)) != NULL) {
 477                                 vref(lowervp);
 478                                 vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
 479                                 lerror = 0;
 480                         }
 481                 }
 482         }
 483
 484         /*
 485          * Ok.  Now we have uerror, uppervp, upperdvp, lerror, and lowervp.
 486          *
 487          * 1. If both layers returned an error, select the upper layer.
 488          *
 489          * 2. If the upper layer faile and the bottom layer succeeded,
 490          *    two subcases occur:
 491          *
 492          *      a.  The bottom vnode is not a directory, in which case
 493          *          just return a new union vnode referencing an
 494          *          empty top layer and the existing bottom layer.
 495          *
 496          *      b.  The button vnode is a directory, in which case
 497          *          create a new directory in the top layer and
 498          *          and fall through to case 3.
 499          *
 500          * 3. If the top layer succeeded then return a new union
 501          *    vnode referencing whatever the new top layer and
 502          *    whatever the bottom layer returned.
 503          */
 504
 505         /* case 1. */
 506         if ((uerror != 0) && (lerror != 0)) {
 507                 error = uerror;
 508                 goto out;
 509         }
 510
 511         /* case 2. */
 512         if (uerror != 0 /* && (lerror == 0) */ ) {
 513                 if (lowervp->v_type == VDIR) { /* case 2b. */
 514                         KASSERT(uppervp == NULL, ("uppervp unexpectedly non-NULL"));
 515                         /*
 516                          * oops, uppervp has a problem, we may have to shadow.
 517                          */
 518                         uerror = union_mkshadow(um, upperdvp, cnp, &uppervp);
 519                         if (uerror) {
 520                                 error = uerror;
 521                                 goto out;
 522                         }
 523                 }
 524         }
 525
 526         /*
 527          * Must call union_allocvp with both the upper and lower vnodes
 528          * referenced and the upper vnode locked.   ap->a_vpp is returned
 529          * referenced and locked.  lowervp, uppervp, and upperdvp are
 530          * absorbed by union_allocvp() whether it succeeds or fails.
 531          *
 532          * upperdvp is the parent directory of uppervp which may be
 533          * different, depending on the path, from dvp->un_uppervp.  That's
 534          * why it is a separate argument.  Note that it must be unlocked.
 535          *
 536          * dvp must be locked on entry to the call and will be locked on
 537          * return.
 538          */
 539
 540         if (uppervp && uppervp != upperdvp)
 541                 vn_unlock(uppervp);
 542         if (lowervp)
 543                 vn_unlock(lowervp);
 544         if (upperdvp)
 545                 vn_unlock(upperdvp);
 546
 547         error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
 548                               uppervp, lowervp, 1);
 549
 550         UDEBUG(("Create %p = %p %p refs=%d\n",
 551                 *ap->a_vpp, uppervp, lowervp,
 552                 (*ap->a_vpp) ? (VREFCNT(*ap->a_vpp)) : -99));
 553
 554         uppervp = NULL;
 555         upperdvp = NULL;
 556         lowervp = NULL;
 557
 558         /*
 559          *      Termination Code
 560          *
 561          *      - put away any extra junk laying around.  Note that lowervp
 562          *        (if not NULL) will never be the same as *ap->a_vp and
 563          *        neither will uppervp, because when we set that state we
 564          *        NULL-out lowervp or uppervp.  On the otherhand, upperdvp
 565          *        may match uppervp or *ap->a_vpp.
 566          *
 567          *      - relock/unlock dvp if appropriate.
 568          */
 569
 570 out:
 571         if (upperdvp) {
 572                 if (upperdvp == uppervp || upperdvp == *ap->a_vpp)
 573                         vrele(upperdvp);
 574                 else
 575                         vput(upperdvp);
 576         }
 577
 578         if (uppervp)
 579                 vput(uppervp);
 580
 581         if (lowervp)
 582                 vput(lowervp);
 583
 584         /*
 585          * Restore LOCKPARENT state
 586          */
 587
 588         if (!lockparent)
 589                 cnp->cn_flags &= ~CNP_LOCKPARENT;
 590
 591         UDEBUG(("Out %d vpp %p/%d lower %p upper %p\n", error, *ap->a_vpp,
 592                 ((*ap->a_vpp) ? (*ap->a_vpp)->v_refcnt : -99),
 593                 lowervp, uppervp));
 594
 595         /*
 596          * dvp lock state, determine whether to relock dvp.  dvp is expected
 597          * to be locked on return if:
 598          *
 599          *      - there was an error (except not EJUSTRETURN), or
 600          *      - we hit the last component and lockparent is true
 601          *
 602          * dvp_is_locked is the current state of the dvp lock, not counting
 603          * the possibility that *ap->a_vpp == dvp (in which case it is locked
 604          * anyway).  Note that *ap->a_vpp == dvp only if no error occured.
 605          */
 606
 607         if (*ap->a_vpp != dvp) {
 608                 if ((error == 0 || error == EJUSTRETURN) && !lockparent) {
 609                         vn_unlock(dvp);
 610                 }
 611         }
 612
 613         /*
 614          * Diagnostics
 615          */
 616
 617 #ifdef DIAGNOSTIC
 618         if (cnp->cn_namelen == 1 &&
 619             cnp->cn_nameptr[0] == '.' &&
 620             *ap->a_vpp != dvp) {
 621                 panic("union_lookup returning . (%p) not same as startdir (%p)", ap->a_vpp, dvp);
 622         }
 623 #endif
 624
 625         return (error);
 626 }
 627
 628 /*
 629  *      union_create:
 630  *
 631  * a_dvp is locked on entry and remains locked on return.  a_vpp is returned
 632  * locked if no error occurs, otherwise it is garbage.
 633  *
 634  * union_create(struct vnode *a_dvp, struct vnode **a_vpp,
 635  *              struct componentname *a_cnp, struct vattr *a_vap)
 636  */
 637 static int
 638 union_create(struct vop_old_create_args *ap)
 639 {
 640         struct union_node *dun = VTOUNION(ap->a_dvp);
 641         struct componentname *cnp = ap->a_cnp;
 642         struct thread *td = cnp->cn_td;
 643         struct vnode *dvp;
 644         int error = EROFS;
 645
 646         if ((dvp = union_lock_upper(dun, td)) != NULL) {
 647                 struct vnode *vp;
 648                 struct mount *mp;
 649
 650                 error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
 651                 if (error == 0) {
 652                         mp = ap->a_dvp->v_mount;
 653                         vn_unlock(vp);
 654                         UDEBUG(("ALLOCVP-1 FROM %p REFS %d\n",
 655                                 vp, vp->v_refcnt));
 656                         error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
 657                                 cnp, vp, NULLVP, 1);
 658                         UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n",
 659                                 *ap->a_vpp, vp->v_refcnt));
 660                 }
 661                 union_unlock_upper(dvp, td);
 662         }
 663         return (error);
 664 }
 665
 666 /*
 667  * union_whiteout(struct vnode *a_dvp, struct componentname *a_cnp,
 668  *                int a_flags)
 669  */
 670 static int
 671 union_whiteout(struct vop_old_whiteout_args *ap)
 672 {
 673         struct union_node *un = VTOUNION(ap->a_dvp);
 674         struct componentname *cnp = ap->a_cnp;
 675         struct vnode *uppervp;
 676         int error = EOPNOTSUPP;
 677
 678         if ((uppervp = union_lock_upper(un, cnp->cn_td)) != NULLVP) {
 679                 error = VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags);
 680                 union_unlock_upper(uppervp, cnp->cn_td);
 681         }
 682         return(error);
 683 }
 684
 685 /*
 686  *      union_mknod:
 687  *
 688  *      a_dvp is locked on entry and should remain locked on return.
 689  *      a_vpp is garbagre whether an error occurs or not.
 690  *
 691  * union_mknod(struct vnode *a_dvp, struct vnode **a_vpp,
 692  *              struct componentname *a_cnp, struct vattr *a_vap)
 693  */
 694 static int
 695 union_mknod(struct vop_old_mknod_args *ap)
 696 {
 697         struct union_node *dun = VTOUNION(ap->a_dvp);
 698         struct componentname *cnp = ap->a_cnp;
 699         struct vnode *dvp;
 700         int error = EROFS;
 701
 702         if ((dvp = union_lock_upper(dun, cnp->cn_td)) != NULL) {
 703                 error = VOP_MKNOD(dvp, ap->a_vpp, cnp, ap->a_vap);
 704                 union_unlock_upper(dvp, cnp->cn_td);
 705         }
 706         return (error);
 707 }
 708
 709 /*
 710  *      union_open:
 711  *
 712  *      run open VOP.  When opening the underlying vnode we have to mimic
 713  *      vn_open.  What we *really* need to do to avoid screwups if the
 714  *      open semantics change is to call vn_open().  For example, ufs blows
 715  *      up if you open a file but do not vmio it prior to writing.
 716  *
 717  * union_open(struct vnode *a_vp, int a_mode,
 718  *            struct ucred *a_cred, struct thread *a_td)
 719  */
 720 static int
 721 union_open(struct vop_open_args *ap)
 722 {
 723         struct union_node *un = VTOUNION(ap->a_vp);
 724         struct vnode *tvp;
 725         int mode = ap->a_mode;
 726         struct ucred *cred = ap->a_cred;
 727         struct thread *td = ap->a_td;
 728         int error = 0;
 729         int tvpisupper = 1;
 730
 731         /*
 732          * If there is an existing upper vp then simply open that.
 733          * The upper vp takes precedence over the lower vp.  When opening
 734          * a lower vp for writing copy it to the uppervp and then open the
 735          * uppervp.
 736          *
 737          * At the end of this section tvp will be left locked.
 738          */
 739         if ((tvp = union_lock_upper(un, td)) == NULLVP) {
 740                 /*
 741                  * If the lower vnode is being opened for writing, then
 742                  * copy the file contents to the upper vnode and open that,
 743                  * otherwise can simply open the lower vnode.
 744                  */
 745                 tvp = un->un_lowervp;
 746                 if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
 747                         int docopy = !(mode & O_TRUNC);
 748                         error = union_copyup(un, docopy, cred, td);
 749                         tvp = union_lock_upper(un, td);
 750                 } else {
 751                         un->un_openl++;
 752                         vref(tvp);
 753                         vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
 754                         tvpisupper = 0;
 755                 }
 756         }
 757
 758         /*
 759          * We are holding the correct vnode, open it.  Note
 760          * that in DragonFly, VOP_OPEN is responsible for associating
 761          * a VM object with the vnode if the vnode is mappable or the
 762          * underlying filesystem uses buffer cache calls on it.
 763          */
 764         if (error == 0)
 765                 error = VOP_OPEN(tvp, mode, cred, NULL);
 766
 767         /*
 768          * Release any locks held
 769          */
 770         if (tvpisupper) {
 771                 if (tvp)
 772                         union_unlock_upper(tvp, td);
 773         } else {
 774                 vput(tvp);
 775         }
 776         return (error);
 777 }
 778
 779 /*
 780  *      union_close:
 781  *
 782  *      It is unclear whether a_vp is passed locked or unlocked.  Whatever
 783  *      the case we do not change it.
 784  *
 785  * union_close(struct vnode *a_vp, int a_fflag, struct ucred *a_cred,
 786  *              struct thread *a_td)
 787  */
 788 static int
 789 union_close(struct vop_close_args *ap)
 790 {
 791         struct union_node *un = VTOUNION(ap->a_vp);
 792         struct vnode *vp;
 793
 794         vn_lock(vp, LK_UPGRADE | LK_RETRY);
 795         if ((vp = un->un_uppervp) == NULLVP) {
 796 #ifdef UNION_DIAGNOSTIC
 797                 if (un->un_openl <= 0)
 798                         panic("union: un_openl cnt");
 799 #endif
 800                 --un->un_openl;
 801                 vp = un->un_lowervp;
 802         }
 803         ap->a_head.a_ops = *vp->v_ops;
 804         ap->a_vp = vp;
 805         return(vop_close_ap(ap));
 806 }
 807
 808 /*
 809  * Check access permission on the union vnode.
 810  * The access check being enforced is to check
 811  * against both the underlying vnode, and any
 812  * copied vnode.  This ensures that no additional
 813  * file permissions are given away simply because
 814  * the user caused an implicit file copy.
 815  *
 816  * union_access(struct vnode *a_vp, int a_mode,
 817  *              struct ucred *a_cred, struct thread *a_td)
 818  */
 819 static int
 820 union_access(struct vop_access_args *ap)
 821 {
 822         struct union_node *un = VTOUNION(ap->a_vp);
 823         struct thread *td = ap->a_td;
 824         int error = EACCES;
 825         struct vnode *vp;
 826
 827         /*
 828          * Disallow write attempts on filesystems mounted read-only.
 829          */
 830         if ((ap->a_mode & VWRITE) &&
 831             (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) {
 832                 switch (ap->a_vp->v_type) {
 833                 case VREG:
 834                 case VDIR:
 835                 case VLNK:
 836                         return (EROFS);
 837                 default:
 838                         break;
 839                 }
 840         }
 841
 842         if ((vp = union_lock_upper(un, td)) != NULLVP) {
 843                 ap->a_head.a_ops = *vp->v_ops;
 844                 ap->a_vp = vp;
 845                 error = vop_access_ap(ap);
 846                 union_unlock_upper(vp, td);
 847                 return(error);
 848         }
 849
 850         if ((vp = un->un_lowervp) != NULLVP) {
 851                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 852                 ap->a_head.a_ops = *vp->v_ops;
 853                 ap->a_vp = vp;
 854
 855                 /*
 856                  * Remove VWRITE from a_mode if our mount point is RW, because
 857                  * we want to allow writes and lowervp may be read-only.
 858                  */
 859                 if ((un->un_vnode->v_mount->mnt_flag & MNT_RDONLY) == 0)
 860                         ap->a_mode &= ~VWRITE;
 861
 862                 error = vop_access_ap(ap);
 863                 if (error == 0) {
 864                         struct union_mount *um;
 865
 866                         um = MOUNTTOUNIONMOUNT(un->un_vnode->v_mount);
 867
 868                         if (um->um_op == UNMNT_BELOW) {
 869                                 ap->a_cred = um->um_cred;
 870                                 error = vop_access_ap(ap);
 871                         }
 872                 }
 873                 vn_unlock(vp);
 874         }
 875         return(error);
 876 }
 877
 878 /*
 879  * We handle getattr only to change the fsid and
 880  * track object sizes
 881  *
 882  * It's not clear whether VOP_GETATTR is to be
 883  * called with the vnode locked or not.  stat() calls
 884  * it with (vp) locked, and fstat calls it with
 885  * (vp) unlocked.
 886  *
 887  * Because of this we cannot use our normal locking functions
 888  * if we do not intend to lock the main a_vp node.  At the moment
 889  * we are running without any specific locking at all, but beware
 890  * to any programmer that care must be taken if locking is added
 891  * to this function.
 892  *
 893  * union_getattr(struct vnode *a_vp, struct vattr *a_vap,
 894  *               struct ucred *a_cred, struct thread *a_td)
 895  */
 896 static int
 897 union_getattr(struct vop_getattr_args *ap)
 898 {
 899         int error;
 900         struct union_node *un = VTOUNION(ap->a_vp);
 901         struct vnode *vp;
 902         struct vattr *vap;
 903         struct vattr va;
 904
 905         /*
 906          * Some programs walk the filesystem hierarchy by counting
 907          * links to directories to avoid stat'ing all the time.
 908          * This means the link count on directories needs to be "correct".
 909          * The only way to do that is to call getattr on both layers
 910          * and fix up the link count.  The link count will not necessarily
 911          * be accurate but will be large enough to defeat the tree walkers.
 912          */
 913
 914         vap = ap->a_vap;
 915
 916         if ((vp = un->un_uppervp) != NULLVP) {
 917                 error = VOP_GETATTR(vp, vap);
 918                 if (error)
 919                         return (error);
 920                 /* XXX isn't this dangerouso without a lock? */
 921                 union_newsize(ap->a_vp, vap->va_size, VNOVAL);
 922         }
 923
 924         if (vp == NULLVP) {
 925                 vp = un->un_lowervp;
 926         } else if (vp->v_type == VDIR && un->un_lowervp != NULLVP) {
 927                 vp = un->un_lowervp;
 928                 vap = &va;
 929         } else {
 930                 vp = NULLVP;
 931         }
 932
 933         if (vp != NULLVP) {
 934                 error = VOP_GETATTR(vp, vap);
 935                 if (error)
 936                         return (error);
 937                 /* XXX isn't this dangerous without a lock? */
 938                 union_newsize(ap->a_vp, VNOVAL, vap->va_size);
 939         }
 940
 941         if ((vap != ap->a_vap) && (vap->va_type == VDIR))
 942                 ap->a_vap->va_nlink += vap->va_nlink;
 943         return (0);
 944 }
 945
 946 /*
 947  * union_setattr(struct vnode *a_vp, struct vattr *a_vap,
 948  *               struct ucred *a_cred, struct thread *a_td)
 949  */
 950 static int
 951 union_setattr(struct vop_setattr_args *ap)
 952 {
 953         struct union_node *un = VTOUNION(ap->a_vp);
 954         struct thread *td = ap->a_td;
 955         struct vattr *vap = ap->a_vap;
 956         struct vnode *uppervp;
 957         int error;
 958
 959         /*
 960          * Disallow write attempts on filesystems mounted read-only.
 961          */
 962         if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) &&
 963             (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
 964              vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
 965              vap->va_mtime.tv_sec != VNOVAL ||
 966              vap->va_mode != (mode_t)VNOVAL)) {
 967                 return (EROFS);
 968         }
 969
 970         /*
 971          * Handle case of truncating lower object to zero size,
 972          * by creating a zero length upper object.  This is to
 973          * handle the case of open with O_TRUNC and O_CREAT.
 974          */
 975         if (un->un_uppervp == NULLVP && (un->un_lowervp->v_type == VREG)) {
 976                 error = union_copyup(un, (ap->a_vap->va_size != 0),
 977                             ap->a_cred, ap->a_td);
 978                 if (error)
 979                         return (error);
 980         }
 981
 982         /*
 983          * Try to set attributes in upper layer,
 984          * otherwise return read-only filesystem error.
 985          */
 986         error = EROFS;
 987         if ((uppervp = union_lock_upper(un, td)) != NULLVP) {
 988                 error = VOP_SETATTR(un->un_uppervp, ap->a_vap, ap->a_cred);
 989                 if ((error == 0) && (ap->a_vap->va_size != VNOVAL))
 990                         union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL);
 991                 union_unlock_upper(uppervp, td);
 992         }
 993         return (error);
 994 }
 995
 996 /*
 997  *      union_getpages:
 998  */
 999
1000 static int
1001 union_getpages(struct vop_getpages_args *ap)
1002 {
1003         int r;
1004
1005         r = vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
1006                                          ap->a_count, ap->a_reqpage,
1007                                          ap->a_seqaccess);
1008         return(r);
1009 }
1010
1011 /*
1012  *      union_putpages:
1013  */
1014
1015 static int
1016 union_putpages(struct vop_putpages_args *ap)
1017 {
1018         int r;
1019
1020         r = vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
1021                 ap->a_sync, ap->a_rtvals);
1022         return(r);
1023 }
1024
1025 /*
1026  * union_read(struct vnode *a_vp, struct uio *a_uio, int a_ioflag,
1027  *            struct ucred *a_cred)
1028  */
1029 static int
1030 union_read(struct vop_read_args *ap)
1031 {
1032         struct union_node *un = VTOUNION(ap->a_vp);
1033         struct thread *td = ap->a_uio->uio_td;
1034         struct vnode *uvp;
1035         int error;
1036
1037         uvp = union_lock_other(un, td);
1038         KASSERT(uvp != NULL, ("union_read: backing vnode missing!"));
1039
1040         if (ap->a_vp->v_flag & VOBJBUF)
1041                 union_vm_coherency(ap->a_vp, ap->a_uio, 0);
1042
1043         error = VOP_READ(uvp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1044         union_unlock_other(uvp, td);
1045
1046         /*
1047          * XXX
1048          * perhaps the size of the underlying object has changed under
1049          * our feet.  take advantage of the offset information present
1050          * in the uio structure.
1051          */
1052         if (error == 0) {
1053                 struct union_node *un = VTOUNION(ap->a_vp);
1054                 off_t cur = ap->a_uio->uio_offset;
1055
1056                 if (uvp == un->un_uppervp) {
1057                         if (cur > un->un_uppersz)
1058                                 union_newsize(ap->a_vp, cur, VNOVAL);
1059                 } else {
1060                         if (cur > un->un_lowersz)
1061                                 union_newsize(ap->a_vp, VNOVAL, cur);
1062                 }
1063         }
1064         return (error);
1065 }
1066
1067 /*
1068  * union_write(struct vnode *a_vp, struct uio *a_uio, int a_ioflag,
1069  *              struct ucred *a_cred)
1070  */
1071 static int
1072 union_write(struct vop_read_args *ap)
1073 {
1074         struct union_node *un = VTOUNION(ap->a_vp);
1075         struct thread *td = ap->a_uio->uio_td;
1076         struct vnode *uppervp;
1077         int error;
1078
1079         if ((uppervp = union_lock_upper(un, td)) == NULLVP)
1080                 panic("union: missing upper layer in write");
1081
1082         /*
1083          * Since our VM pages are associated with our vnode rather then
1084          * the real vnode, and since we do not run our reads and writes
1085          * through our own VM cache, we have a VM/VFS coherency problem.
1086          * We solve them by invalidating or flushing the associated VM
1087          * pages prior to allowing a normal read or write to occur.
1088          *
1089          * VM-backed writes (UIO_NOCOPY) have to be converted to normal
1090          * writes because we are not cache-coherent.  Normal writes need
1091          * to be made coherent with our VM-backing store, which we do by
1092          * first flushing any dirty VM pages associated with the write
1093          * range, and then destroying any clean VM pages associated with
1094          * the write range.
1095          */
1096
1097         if (ap->a_uio->uio_segflg == UIO_NOCOPY) {
1098                 ap->a_uio->uio_segflg = UIO_SYSSPACE;
1099         } else if (ap->a_vp->v_flag & VOBJBUF) {
1100                 union_vm_coherency(ap->a_vp, ap->a_uio, 1);
1101         }
1102
1103         error = VOP_WRITE(uppervp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1104
1105         /*
1106          * the size of the underlying object may be changed by the
1107          * write.
1108          */
1109         if (error == 0) {
1110                 off_t cur = ap->a_uio->uio_offset;
1111
1112                 if (cur > un->un_uppersz)
1113                         union_newsize(ap->a_vp, cur, VNOVAL);
1114         }
1115         union_unlock_upper(uppervp, td);
1116         return (error);
1117 }
1118
1119 /*
1120  * union_ioctl(struct vnode *a_vp, int a_command, caddr_t a_data, int a_fflag,
1121  *              struct ucred *a_cred, struct thread *a_td)
1122  */
1123 static int
1124 union_ioctl(struct vop_ioctl_args *ap)
1125 {
1126         struct vnode *ovp = OTHERVP(ap->a_vp);
1127
1128         ap->a_head.a_ops = *ovp->v_ops;
1129         ap->a_vp = ovp;
1130         return(vop_ioctl_ap(ap));
1131 }
1132
1133 /*
1134  * union_poll(struct vnode *a_vp, int a_events, struct ucred *a_cred,
1135  *            struct thread *a_td)
1136  */
1137 static int
1138 union_poll(struct vop_poll_args *ap)
1139 {
1140         struct vnode *ovp = OTHERVP(ap->a_vp);
1141
1142         ap->a_head.a_ops = *ovp->v_ops;
1143         ap->a_vp = ovp;
1144         return(vop_poll_ap(ap));
1145 }
1146
1147 /*
1148  * union_mmap(struct vnode *a_vp, int a_fflags, struct ucred *a_cred,
1149  *            struct thread *a_td)
1150  */
1151 static int
1152 union_mmap(struct vop_mmap_args *ap)
1153 {
1154         struct vnode *ovp = OTHERVP(ap->a_vp);
1155
1156         ap->a_head.a_ops = *ovp->v_ops;
1157         ap->a_vp = ovp;
1158         return (vop_mmap_ap(ap));
1159 }
1160
1161 /*
1162  * union_fsync(struct vnode *a_vp, struct ucred *a_cred, int a_waitfor,
1163  *              struct thread *a_td)
1164  */
1165 static int
1166 union_fsync(struct vop_fsync_args *ap)
1167 {
1168         int error = 0;
1169         struct thread *td = ap->a_td;
1170         struct vnode *targetvp;
1171         struct union_node *un = VTOUNION(ap->a_vp);
1172
1173         if ((targetvp = union_lock_other(un, td)) != NULLVP) {
1174                 error = VOP_FSYNC(targetvp, ap->a_waitfor, 0);
1175                 union_unlock_other(targetvp, td);
1176         }
1177
1178         return (error);
1179 }
1180
1181 /*
1182  *      union_remove:
1183  *
1184  *      Remove the specified cnp.  The dvp and vp are passed to us locked
1185  *      and must remain locked on return.
1186  *
1187  * union_remove(struct vnode *a_dvp, struct vnode *a_vp,
1188  *              struct componentname *a_cnp)
1189  */
1190 static int
1191 union_remove(struct vop_old_remove_args *ap)
1192 {
1193         struct union_node *dun = VTOUNION(ap->a_dvp);
1194         struct union_node *un = VTOUNION(ap->a_vp);
1195         struct componentname *cnp = ap->a_cnp;
1196         struct thread *td = cnp->cn_td;
1197         struct vnode *uppervp;
1198         struct vnode *upperdvp;
1199         int error;
1200
1201         if ((upperdvp = union_lock_upper(dun, td)) == NULLVP)
1202                 panic("union remove: null upper vnode");
1203
1204         if ((uppervp = union_lock_upper(un, td)) != NULLVP) {
1205                 if (union_dowhiteout(un, cnp->cn_cred, td))
1206                         cnp->cn_flags |= CNP_DOWHITEOUT;
1207                 error = VOP_REMOVE(upperdvp, uppervp, cnp);
1208 #if 0
1209                 /* XXX */
1210                 if (!error)
1211                         union_removed_upper(un);
1212 #endif
1213                 union_unlock_upper(uppervp, td);
1214         } else {
1215                 error = union_mkwhiteout(
1216                             MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount),
1217                             upperdvp, ap->a_cnp, un->un_path);
1218         }
1219         union_unlock_upper(upperdvp, td);
1220         return (error);
1221 }
1222
1223 /*
1224  *      union_link:
1225  *
1226  *      tdvp will be locked on entry, vp will not be locked on entry.
1227  *      tdvp should remain locked on return and vp should remain unlocked
1228  *      on return.
1229  *
1230  * union_link(struct vnode *a_tdvp, struct vnode *a_vp,
1231  *            struct componentname *a_cnp)
1232  */
1233 static int
1234 union_link(struct vop_old_link_args *ap)
1235 {
1236         struct componentname *cnp = ap->a_cnp;
1237         struct thread *td = cnp->cn_td;
1238         struct union_node *dun = VTOUNION(ap->a_tdvp);
1239         struct vnode *vp;
1240         struct vnode *tdvp;
1241         int error = 0;
1242
1243         if (ap->a_tdvp->v_ops != ap->a_vp->v_ops) {
1244                 vp = ap->a_vp;
1245         } else {
1246                 struct union_node *tun = VTOUNION(ap->a_vp);
1247
1248                 if (tun->un_uppervp == NULLVP) {
1249                         vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
1250 #if 0
1251                         if (dun->un_uppervp == tun->un_dirvp) {
1252                                 if (dun->un_flags & UN_ULOCK) {
1253                                         dun->un_flags &= ~UN_ULOCK;
1254                                         vn_unlock(dun->un_uppervp);
1255                                 }
1256                         }
1257 #endif
1258                         error = union_copyup(tun, 1, cnp->cn_cred, td);
1259 #if 0
1260                         if (dun->un_uppervp == tun->un_dirvp) {
1261                                 vn_lock(dun->un_uppervp,
1262                                         LK_EXCLUSIVE | LK_RETRY);
1263                                 dun->un_flags |= UN_ULOCK;
1264                         }
1265 #endif
1266                         vn_unlock(ap->a_vp);
1267                 }
1268                 vp = tun->un_uppervp;
1269         }
1270
1271         if (error)
1272                 return (error);
1273
1274         /*
1275          * Make sure upper is locked, then unlock the union directory we were
1276          * called with to avoid a deadlock while we are calling VOP_LINK on
1277          * the upper (with tdvp locked and vp not locked).  Our ap->a_tdvp
1278          * is expected to be locked on return.
1279          */
1280
1281         if ((tdvp = union_lock_upper(dun, td)) == NULLVP)
1282                 return (EROFS);
1283
1284         vn_unlock(ap->a_tdvp);  /* unlock calling node */
1285         error = VOP_LINK(tdvp, vp, cnp); /* call link on upper */
1286
1287         /*
1288          * We have to unlock tdvp prior to relocking our calling node in
1289          * order to avoid a deadlock.
1290          */
1291         union_unlock_upper(tdvp, td);
1292         vn_lock(ap->a_tdvp, LK_EXCLUSIVE | LK_RETRY);
1293         return (error);
1294 }
1295
1296 /*
1297  * union_rename(struct vnode *a_fdvp, struct vnode *a_fvp,
1298  *              struct componentname *a_fcnp, struct vnode *a_tdvp,
1299  *              struct vnode *a_tvp, struct componentname *a_tcnp)
1300  */
1301 static int
1302 union_rename(struct vop_old_rename_args *ap)
1303 {
1304         int error;
1305         struct vnode *fdvp = ap->a_fdvp;
1306         struct vnode *fvp = ap->a_fvp;
1307         struct vnode *tdvp = ap->a_tdvp;
1308         struct vnode *tvp = ap->a_tvp;
1309
1310         /*
1311          * Figure out what fdvp to pass to our upper or lower vnode.  If we
1312          * replace the fdvp, release the original one and ref the new one.
1313          */
1314
1315         if (fdvp->v_tag == VT_UNION) {  /* always true */
1316                 struct union_node *un = VTOUNION(fdvp);
1317                 if (un->un_uppervp == NULLVP) {
1318                         /*
1319                          * this should never happen in normal
1320                          * operation but might if there was
1321                          * a problem creating the top-level shadow
1322                          * directory.
1323                          */
1324                         error = EXDEV;
1325                         goto bad;
1326                 }
1327                 fdvp = un->un_uppervp;
1328                 vref(fdvp);
1329                 vrele(ap->a_fdvp);
1330         }
1331
1332         /*
1333          * Figure out what fvp to pass to our upper or lower vnode.  If we
1334          * replace the fvp, release the original one and ref the new one.
1335          */
1336
1337         if (fvp->v_tag == VT_UNION) {   /* always true */
1338                 struct union_node *un = VTOUNION(fvp);
1339 #if 0
1340                 struct union_mount *um = MOUNTTOUNIONMOUNT(fvp->v_mount);
1341 #endif
1342
1343                 if (un->un_uppervp == NULLVP) {
1344                         switch(fvp->v_type) {
1345                         case VREG:
1346                                 vn_lock(un->un_vnode, LK_EXCLUSIVE | LK_RETRY);
1347                                 error = union_copyup(un, 1, ap->a_fcnp->cn_cred, ap->a_fcnp->cn_td);
1348                                 vn_unlock(un->un_vnode);
1349                                 if (error)
1350                                         goto bad;
1351                                 break;
1352                         case VDIR:
1353                                 /*
1354                                  * XXX not yet.
1355                                  *
1356                                  * There is only one way to rename a directory
1357                                  * based in the lowervp, and that is to copy
1358                                  * the entire directory hierarchy.  Otherwise
1359                                  * it would not last across a reboot.
1360                                  */
1361 #if 0
1362                                 vrele(fvp);
1363                                 fvp = NULL;
1364                                 vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
1365                                 error = union_mkshadow(um, fdvp,
1366                                             ap->a_fcnp, &un->un_uppervp);
1367                                 vn_unlock(fdvp);
1368                                 if (un->un_uppervp)
1369                                         vn_unlock(un->un_uppervp);
1370                                 if (error)
1371                                         goto bad;
1372                                 break;
1373 #endif
1374                         default:
1375                                 error = EXDEV;
1376                                 goto bad;
1377                         }
1378                 }
1379
1380                 if (un->un_lowervp != NULLVP)
1381                         ap->a_fcnp->cn_flags |= CNP_DOWHITEOUT;
1382                 fvp = un->un_uppervp;
1383                 vref(fvp);
1384                 vrele(ap->a_fvp);
1385         }
1386
1387         /*
1388          * Figure out what tdvp (destination directory) to pass to the
1389          * lower level.  If we replace it with uppervp, we need to vput the
1390          * old one.  The exclusive lock is transfered to what we will pass
1391          * down in the VOP_RENAME and we replace uppervp with a simple
1392          * reference.
1393          */
1394
1395         if (tdvp->v_tag == VT_UNION) {
1396                 struct union_node *un = VTOUNION(tdvp);
1397
1398                 if (un->un_uppervp == NULLVP) {
1399                         /*
1400                          * this should never happen in normal
1401                          * operation but might if there was
1402                          * a problem creating the top-level shadow
1403                          * directory.
1404                          */
1405                         error = EXDEV;
1406                         goto bad;
1407                 }
1408
1409                 /*
1410                  * new tdvp is a lock and reference on uppervp, put away
1411                  * the old tdvp.
1412                  */
1413                 tdvp = union_lock_upper(un, ap->a_tcnp->cn_td);
1414                 vput(ap->a_tdvp);
1415         }
1416
1417         /*
1418          * Figure out what tvp (destination file) to pass to the
1419          * lower level.
1420          *
1421          * If the uppervp file does not exist put away the (wrong)
1422          * file and change tvp to NULL.
1423          */
1424
1425         if (tvp != NULLVP && tvp->v_tag == VT_UNION) {
1426                 struct union_node *un = VTOUNION(tvp);
1427
1428                 tvp = union_lock_upper(un, ap->a_tcnp->cn_td);
1429                 vput(ap->a_tvp);
1430                 /* note: tvp may be NULL */
1431         }
1432
1433         /*
1434          * VOP_RENAME releases/vputs prior to returning, so we have no
1435          * cleanup to do.
1436          */
1437
1438         return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp));
1439
1440         /*
1441          * Error.  We still have to release / vput the various elements.
1442          */
1443
1444 bad:
1445         vrele(fdvp);
1446         if (fvp)
1447                 vrele(fvp);
1448         vput(tdvp);
1449         if (tvp != NULLVP) {
1450                 if (tvp != tdvp)
1451                         vput(tvp);
1452                 else
1453                         vrele(tvp);
1454         }
1455         return (error);
1456 }
1457
1458 /*
1459  * union_mkdir(struct vnode *a_dvp, struct vnode **a_vpp,
1460  *              struct componentname *a_cnp, struct vattr *a_vap)
1461  */
1462 static int
1463 union_mkdir(struct vop_old_mkdir_args *ap)
1464 {
1465         struct union_node *dun = VTOUNION(ap->a_dvp);
1466         struct componentname *cnp = ap->a_cnp;
1467         struct thread *td = cnp->cn_td;
1468         struct vnode *upperdvp;
1469         int error = EROFS;
1470
1471         if ((upperdvp = union_lock_upper(dun, td)) != NULLVP) {
1472                 struct vnode *vp;
1473
1474                 error = VOP_MKDIR(upperdvp, &vp, cnp, ap->a_vap);
1475                 union_unlock_upper(upperdvp, td);
1476
1477                 if (error == 0) {
1478                         vn_unlock(vp);
1479                         UDEBUG(("ALLOCVP-2 FROM %p REFS %d\n",
1480                                 vp, vp->v_refcnt));
1481                         error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount,
1482                                 ap->a_dvp, NULLVP, cnp, vp, NULLVP, 1);
1483                         UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n",
1484                                 *ap->a_vpp, vp->v_refcnt));
1485                 }
1486         }
1487         return (error);
1488 }
1489
1490 /*
1491  * union_rmdir(struct vnode *a_dvp, struct vnode *a_vp,
1492  *              struct componentname *a_cnp)
1493  */
1494 static int
1495 union_rmdir(struct vop_old_rmdir_args *ap)
1496 {
1497         struct union_node *dun = VTOUNION(ap->a_dvp);
1498         struct union_node *un = VTOUNION(ap->a_vp);
1499         struct componentname *cnp = ap->a_cnp;
1500         struct thread *td = cnp->cn_td;
1501         struct vnode *upperdvp;
1502         struct vnode *uppervp;
1503         int error;
1504
1505         if ((upperdvp = union_lock_upper(dun, td)) == NULLVP)
1506                 panic("union rmdir: null upper vnode");
1507
1508         if ((uppervp = union_lock_upper(un, td)) != NULLVP) {
1509                 if (union_dowhiteout(un, cnp->cn_cred, td))
1510                         cnp->cn_flags |= CNP_DOWHITEOUT;
1511                 error = VOP_RMDIR(upperdvp, uppervp, ap->a_cnp);
1512                 union_unlock_upper(uppervp, td);
1513         } else {
1514                 error = union_mkwhiteout(
1515                             MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount),
1516                             dun->un_uppervp, ap->a_cnp, un->un_path);
1517         }
1518         union_unlock_upper(upperdvp, td);
1519         return (error);
1520 }
1521
1522 /*
1523  *      union_symlink:
1524  *
1525  *      dvp is locked on entry and remains locked on return.  a_vpp is garbage
1526  *      (unused).
1527  *
1528  * union_symlink(struct vnode *a_dvp, struct vnode **a_vpp,
1529  *              struct componentname *a_cnp, struct vattr *a_vap,
1530  *              char *a_target)
1531  */
1532 static int
1533 union_symlink(struct vop_old_symlink_args *ap)
1534 {
1535         struct union_node *dun = VTOUNION(ap->a_dvp);
1536         struct componentname *cnp = ap->a_cnp;
1537         struct thread *td = cnp->cn_td;
1538         struct vnode *dvp;
1539         int error = EROFS;
1540
1541         if ((dvp = union_lock_upper(dun, td)) != NULLVP) {
1542                 error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1543                             ap->a_target);
1544                 union_unlock_upper(dvp, td);
1545         }
1546         return (error);
1547 }
1548
1549 /*
1550  * union_readdir works in concert with getdirentries and
1551  * readdir(3) to provide a list of entries in the unioned
1552  * directories.  getdirentries is responsible for walking
1553  * down the union stack.  readdir(3) is responsible for
1554  * eliminating duplicate names from the returned data stream.
1555  *
1556  * union_readdir(struct vnode *a_vp, struct uio *a_uio, struct ucred *a_cred,
1557  *               int *a_eofflag, off_t *a_cookies, int a_ncookies)
1558  */
1559 static int
1560 union_readdir(struct vop_readdir_args *ap)
1561 {
1562         struct union_node *un = VTOUNION(ap->a_vp);
1563         struct thread *td = ap->a_uio->uio_td;
1564         struct vnode *uvp;
1565         int error = 0;
1566
1567         if ((uvp = union_ref_upper(un)) != NULLVP) {
1568                 ap->a_head.a_ops = *uvp->v_ops;
1569                 ap->a_vp = uvp;
1570                 error = vop_readdir_ap(ap);
1571                 vrele(uvp);
1572         }
1573         return(error);
1574 }
1575
1576 /*
1577  * union_readlink(struct vnode *a_vp, struct uio *a_uio, struct ucred *a_cred)
1578  */
1579 static int
1580 union_readlink(struct vop_readlink_args *ap)
1581 {
1582         int error;
1583         struct union_node *un = VTOUNION(ap->a_vp);
1584         struct uio *uio = ap->a_uio;
1585         struct thread *td = uio->uio_td;
1586         struct vnode *vp;
1587
1588         vp = union_lock_other(un, td);
1589         KASSERT(vp != NULL, ("union_readlink: backing vnode missing!"));
1590
1591         ap->a_head.a_ops = *vp->v_ops;
1592         ap->a_vp = vp;
1593         error = vop_readlink_ap(ap);
1594         union_unlock_other(vp, td);
1595
1596         return (error);
1597 }
1598
1599 /*
1600  *      union_inactive:
1601  *
1602  *      Called with the vnode locked.  We are expected to unlock the vnode.
1603  *
1604  * union_inactive(struct vnode *a_vp, struct thread *a_td)
1605  */
1606 static int
1607 union_inactive(struct vop_inactive_args *ap)
1608 {
1609         struct vnode *vp = ap->a_vp;
1610         /*struct thread *td = ap->a_td;*/
1611         struct union_node *un = VTOUNION(vp);
1612         struct vnode **vpp;
1613
1614         /*
1615          * Do nothing (and _don't_ bypass).
1616          * Wait to vrele lowervp until reclaim,
1617          * so that until then our union_node is in the
1618          * cache and reusable.
1619          *
1620          * NEEDSWORK: Someday, consider inactive'ing
1621          * the lowervp and then trying to reactivate it
1622          * with capabilities (v_id)
1623          * like they do in the name lookup cache code.
1624          * That's too much work for now.
1625          */
1626
1627         if (un->un_dircache != 0) {
1628                 for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1629                         vrele(*vpp);
1630                 kfree (un->un_dircache, M_TEMP);
1631                 un->un_dircache = 0;
1632         }
1633
1634 #if 0
1635         if ((un->un_flags & UN_ULOCK) && un->un_uppervp) {
1636                 un->un_flags &= ~UN_ULOCK;
1637                 vn_unlock(un->un_uppervp);
1638         }
1639 #endif
1640
1641         if ((un->un_flags & UN_CACHED) == 0)
1642                 vgone_vxlocked(vp);
1643
1644         return (0);
1645 }
1646
1647 /*
1648  * union_reclaim(struct vnode *a_vp)
1649  */
1650 static int
1651 union_reclaim(struct vop_reclaim_args *ap)
1652 {
1653         union_freevp(ap->a_vp);
1654
1655         return (0);
1656 }
1657
1658 /*
1659  *      union_bmap:
1660  *
1661  *      There isn't much we can do.  We cannot push through to the real vnode
1662  *      to get to the underlying device because this will bypass data
1663  *      cached by the real vnode.
1664  *
1665  *      For some reason we cannot return the 'real' vnode either, it seems
1666  *      to blow up memory maps.
1667  *
1668  * union_bmap(struct vnode *a_vp, off_t a_loffset,
1669  *            off_t *a_doffsetp, int *a_runp, int *a_runb)
1670  */
1671 static int
1672 union_bmap(struct vop_bmap_args *ap)
1673 {
1674         return(EOPNOTSUPP);
1675 }
1676
1677 /*
1678  * union_print(struct vnode *a_vp)
1679  */
1680 static int
1681 union_print(struct vop_print_args *ap)
1682 {
1683         struct vnode *vp = ap->a_vp;
1684
1685         kprintf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
1686                         vp, UPPERVP(vp), LOWERVP(vp));
1687         if (UPPERVP(vp) != NULLVP)
1688                 vprint("union: upper", UPPERVP(vp));
1689         if (LOWERVP(vp) != NULLVP)
1690                 vprint("union: lower", LOWERVP(vp));
1691
1692         return (0);
1693 }
1694
1695 /*
1696  * union_pathconf(struct vnode *a_vp, int a_name, int *a_retval)
1697  */
1698 static int
1699 union_pathconf(struct vop_pathconf_args *ap)
1700 {
1701         int error;
1702         struct thread *td = curthread;          /* XXX */
1703         struct union_node *un = VTOUNION(ap->a_vp);
1704         struct vnode *vp;
1705
1706         vp = union_lock_other(un, td);
1707         KASSERT(vp != NULL, ("union_pathconf: backing vnode missing!"));
1708
1709         ap->a_head.a_ops = *vp->v_ops;
1710         ap->a_vp = vp;
1711         error = vop_pathconf_ap(ap);
1712         union_unlock_other(vp, td);
1713
1714         return (error);
1715 }
1716
1717 /*
1718  * union_advlock(struct vnode *a_vp, caddr_t a_id, int a_op,
1719  *               struct flock *a_fl, int a_flags)
1720  */
1721 static int
1722 union_advlock(struct vop_advlock_args *ap)
1723 {
1724         struct vnode *ovp = OTHERVP(ap->a_vp);
1725
1726         ap->a_head.a_ops = *ovp->v_ops;
1727         ap->a_vp = ovp;
1728         return (vop_advlock_ap(ap));
1729 }
1730
1731
1732 /*
1733  * XXX - vop_strategy must be hand coded because it has no
1734  * YYY - and it is not coherent with anything
1735  *
1736  * vnode in its arguments.
1737  * This goes away with a merged VM/buffer cache.
1738  *
1739  * union_strategy(struct vnode *a_vp, struct bio *a_bio)
1740  */
1741 static int
1742 union_strategy(struct vop_strategy_args *ap)
1743 {
1744         struct bio *bio = ap->a_bio;
1745         struct buf *bp = bio->bio_buf;
1746         struct vnode *othervp = OTHERVP(ap->a_vp);
1747
1748 #ifdef DIAGNOSTIC
1749         if (othervp == NULLVP)
1750                 panic("union_strategy: nil vp");
1751         if (bp->b_cmd != BUF_CMD_READ && (othervp == LOWERVP(ap->a_vp)))
1752                 panic("union_strategy: writing to lowervp");
1753 #endif
1754         return (vn_strategy(othervp, bio));
1755 }
1756
1757 /*
1758  * Global vfs data structures
1759  */
1760 struct vop_ops union_vnode_vops = {
1761         .vop_default =          vop_defaultop,
1762         .vop_access =           union_access,
1763         .vop_advlock =          union_advlock,
1764         .vop_bmap =             union_bmap,
1765         .vop_close =            union_close,
1766         .vop_old_create =       union_create,
1767         .vop_fsync =            union_fsync,
1768         .vop_getpages =         union_getpages,
1769         .vop_putpages =         union_putpages,
1770         .vop_getattr =          union_getattr,
1771         .vop_inactive =         union_inactive,
1772         .vop_ioctl =            union_ioctl,
1773         .vop_old_link =         union_link,
1774         .vop_old_lookup =       union_lookup,
1775         .vop_old_mkdir =        union_mkdir,
1776         .vop_old_mknod =        union_mknod,
1777         .vop_mmap =             union_mmap,
1778         .vop_open =             union_open,
1779         .vop_pathconf =         union_pathconf,
1780         .vop_poll =             union_poll,
1781         .vop_print =            union_print,
1782         .vop_read =             union_read,
1783         .vop_readdir =          union_readdir,
1784         .vop_readlink =         union_readlink,
1785         .vop_reclaim =          union_reclaim,
1786         .vop_old_remove =       union_remove,
1787         .vop_old_rename =       union_rename,
1788         .vop_old_rmdir =        union_rmdir,
1789         .vop_setattr =          union_setattr,
1790         .vop_strategy =         union_strategy,
1791         .vop_old_symlink =      union_symlink,
1792         .vop_old_whiteout =     union_whiteout,
1793         .vop_write =            union_write
1794 };
1795