proc->thread stage 4: rework the VFS and DEVICE subsystems to take thread
[dragonfly.git] / sys / vfs / nullfs / null_vnops.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1992, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * John Heidemann of the UCLA Ficus project.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)null_vnops.c 8.6 (Berkeley) 5/27/95
37 *
38 * Ancestors:
39 * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92
40 * $FreeBSD: src/sys/miscfs/nullfs/null_vnops.c,v 1.38.2.6 2002/07/31 00:32:28 semenu Exp $
dadab5e9 41 * $DragonFly: src/sys/vfs/nullfs/null_vnops.c,v 1.3 2003/06/25 03:55:59 dillon Exp $
984263bc
MD
42 * ...and...
43 * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
44 *
45 * $FreeBSD: src/sys/miscfs/nullfs/null_vnops.c,v 1.38.2.6 2002/07/31 00:32:28 semenu Exp $
46 */
47
48/*
49 * Null Layer
50 *
51 * (See mount_null(8) for more information.)
52 *
53 * The null layer duplicates a portion of the file system
54 * name space under a new name. In this respect, it is
55 * similar to the loopback file system. It differs from
56 * the loopback fs in two respects: it is implemented using
57 * a stackable layers techniques, and its "null-node"s stack above
58 * all lower-layer vnodes, not just over directory vnodes.
59 *
60 * The null layer has two purposes. First, it serves as a demonstration
61 * of layering by proving a layer which does nothing. (It actually
62 * does everything the loopback file system does, which is slightly
63 * more than nothing.) Second, the null layer can serve as a prototype
64 * layer. Since it provides all necessary layer framework,
65 * new file system layers can be created very easily be starting
66 * with a null layer.
67 *
68 * The remainder of this man page examines the null layer as a basis
69 * for constructing new layers.
70 *
71 *
72 * INSTANTIATING NEW NULL LAYERS
73 *
74 * New null layers are created with mount_null(8).
75 * Mount_null(8) takes two arguments, the pathname
76 * of the lower vfs (target-pn) and the pathname where the null
77 * layer will appear in the namespace (alias-pn). After
78 * the null layer is put into place, the contents
79 * of target-pn subtree will be aliased under alias-pn.
80 *
81 *
82 * OPERATION OF A NULL LAYER
83 *
84 * The null layer is the minimum file system layer,
85 * simply bypassing all possible operations to the lower layer
86 * for processing there. The majority of its activity centers
87 * on the bypass routine, through which nearly all vnode operations
88 * pass.
89 *
90 * The bypass routine accepts arbitrary vnode operations for
91 * handling by the lower layer. It begins by examing vnode
92 * operation arguments and replacing any null-nodes by their
93 * lower-layer equivlants. It then invokes the operation
94 * on the lower layer. Finally, it replaces the null-nodes
95 * in the arguments and, if a vnode is return by the operation,
96 * stacks a null-node on top of the returned vnode.
97 *
98 * Although bypass handles most operations, vop_getattr, vop_lock,
99 * vop_unlock, vop_inactive, vop_reclaim, and vop_print are not
100 * bypassed. Vop_getattr must change the fsid being returned.
101 * Vop_lock and vop_unlock must handle any locking for the
102 * current vnode as well as pass the lock request down.
103 * Vop_inactive and vop_reclaim are not bypassed so that
104 * they can handle freeing null-layer specific data. Vop_print
105 * is not bypassed to avoid excessive debugging information.
106 * Also, certain vnode operations change the locking state within
107 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
108 * and symlink). Ideally these operations should not change the
109 * lock state, but should be changed to let the caller of the
110 * function unlock them. Otherwise all intermediate vnode layers
111 * (such as union, umapfs, etc) must catch these functions to do
112 * the necessary locking at their layer.
113 *
114 *
115 * INSTANTIATING VNODE STACKS
116 *
117 * Mounting associates the null layer with a lower layer,
118 * effect stacking two VFSes. Vnode stacks are instead
119 * created on demand as files are accessed.
120 *
121 * The initial mount creates a single vnode stack for the
122 * root of the new null layer. All other vnode stacks
123 * are created as a result of vnode operations on
124 * this or other null vnode stacks.
125 *
126 * New vnode stacks come into existance as a result of
127 * an operation which returns a vnode.
128 * The bypass routine stacks a null-node above the new
129 * vnode before returning it to the caller.
130 *
131 * For example, imagine mounting a null layer with
132 * "mount_null /usr/include /dev/layer/null".
133 * Changing directory to /dev/layer/null will assign
134 * the root null-node (which was created when the null layer was mounted).
135 * Now consider opening "sys". A vop_lookup would be
136 * done on the root null-node. This operation would bypass through
137 * to the lower layer which would return a vnode representing
138 * the UFS "sys". Null_bypass then builds a null-node
139 * aliasing the UFS "sys" and returns this to the caller.
140 * Later operations on the null-node "sys" will repeat this
141 * process when constructing other vnode stacks.
142 *
143 *
144 * CREATING OTHER FILE SYSTEM LAYERS
145 *
146 * One of the easiest ways to construct new file system layers is to make
147 * a copy of the null layer, rename all files and variables, and
148 * then begin modifing the copy. Sed can be used to easily rename
149 * all variables.
150 *
151 * The umap layer is an example of a layer descended from the
152 * null layer.
153 *
154 *
155 * INVOKING OPERATIONS ON LOWER LAYERS
156 *
157 * There are two techniques to invoke operations on a lower layer
158 * when the operation cannot be completely bypassed. Each method
159 * is appropriate in different situations. In both cases,
160 * it is the responsibility of the aliasing layer to make
161 * the operation arguments "correct" for the lower layer
162 * by mapping an vnode arguments to the lower layer.
163 *
164 * The first approach is to call the aliasing layer's bypass routine.
165 * This method is most suitable when you wish to invoke the operation
166 * currently being handled on the lower layer. It has the advantage
167 * that the bypass routine already must do argument mapping.
168 * An example of this is null_getattrs in the null layer.
169 *
170 * A second approach is to directly invoke vnode operations on
171 * the lower layer with the VOP_OPERATIONNAME interface.
172 * The advantage of this method is that it is easy to invoke
173 * arbitrary operations on the lower layer. The disadvantage
174 * is that vnode arguments must be manualy mapped.
175 *
176 */
177
178#include <sys/param.h>
179#include <sys/systm.h>
180#include <sys/kernel.h>
181#include <sys/sysctl.h>
182#include <sys/vnode.h>
183#include <sys/mount.h>
dadab5e9 184#include <sys/proc.h>
984263bc
MD
185#include <sys/namei.h>
186#include <sys/malloc.h>
187#include <sys/buf.h>
188#include <miscfs/nullfs/null.h>
189
190static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
191SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW,
192 &null_bug_bypass, 0, "");
193
194static int null_access(struct vop_access_args *ap);
195static int null_createvobject(struct vop_createvobject_args *ap);
196static int null_destroyvobject(struct vop_destroyvobject_args *ap);
197static int null_getattr(struct vop_getattr_args *ap);
198static int null_getvobject(struct vop_getvobject_args *ap);
199static int null_inactive(struct vop_inactive_args *ap);
200static int null_islocked(struct vop_islocked_args *ap);
201static int null_lock(struct vop_lock_args *ap);
202static int null_lookup(struct vop_lookup_args *ap);
203static int null_open(struct vop_open_args *ap);
204static int null_print(struct vop_print_args *ap);
205static int null_reclaim(struct vop_reclaim_args *ap);
206static int null_rename(struct vop_rename_args *ap);
207static int null_setattr(struct vop_setattr_args *ap);
208static int null_unlock(struct vop_unlock_args *ap);
209
210/*
211 * This is the 10-Apr-92 bypass routine.
212 * This version has been optimized for speed, throwing away some
213 * safety checks. It should still always work, but it's not as
214 * robust to programmer errors.
215 *
216 * In general, we map all vnodes going down and unmap them on the way back.
217 * As an exception to this, vnodes can be marked "unmapped" by setting
218 * the Nth bit in operation's vdesc_flags.
219 *
220 * Also, some BSD vnode operations have the side effect of vrele'ing
221 * their arguments. With stacking, the reference counts are held
222 * by the upper node, not the lower one, so we must handle these
223 * side-effects here. This is not of concern in Sun-derived systems
224 * since there are no such side-effects.
225 *
226 * This makes the following assumptions:
227 * - only one returned vpp
228 * - no INOUT vpp's (Sun's vop_open has one of these)
229 * - the vnode operation vector of the first vnode should be used
230 * to determine what implementation of the op should be invoked
231 * - all mapped vnodes are of our vnode-type (NEEDSWORK:
232 * problems on rmdir'ing mount points and renaming?)
233 */
234int
235null_bypass(ap)
236 struct vop_generic_args /* {
237 struct vnodeop_desc *a_desc;
238 <other random data follows, presumably>
239 } */ *ap;
240{
241 register struct vnode **this_vp_p;
242 int error;
243 struct vnode *old_vps[VDESC_MAX_VPS];
244 struct vnode **vps_p[VDESC_MAX_VPS];
245 struct vnode ***vppp;
246 struct vnodeop_desc *descp = ap->a_desc;
247 int reles, i;
248
249 if (null_bug_bypass)
250 printf ("null_bypass: %s\n", descp->vdesc_name);
251
252#ifdef DIAGNOSTIC
253 /*
254 * We require at least one vp.
255 */
256 if (descp->vdesc_vp_offsets == NULL ||
257 descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
258 panic ("null_bypass: no vp's in map");
259#endif
260
261 /*
262 * Map the vnodes going in.
263 * Later, we'll invoke the operation based on
264 * the first mapped vnode's operation vector.
265 */
266 reles = descp->vdesc_flags;
267 for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
268 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
269 break; /* bail out at end of list */
270 vps_p[i] = this_vp_p =
271 VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
272 /*
273 * We're not guaranteed that any but the first vnode
274 * are of our type. Check for and don't map any
275 * that aren't. (We must always map first vp or vclean fails.)
276 */
277 if (i && (*this_vp_p == NULLVP ||
278 (*this_vp_p)->v_op != null_vnodeop_p)) {
279 old_vps[i] = NULLVP;
280 } else {
281 old_vps[i] = *this_vp_p;
282 *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
283 /*
284 * XXX - Several operations have the side effect
285 * of vrele'ing their vp's. We must account for
286 * that. (This should go away in the future.)
287 */
288 if (reles & VDESC_VP0_WILLRELE)
289 VREF(*this_vp_p);
290 }
291
292 }
293
294 /*
295 * Call the operation on the lower layer
296 * with the modified argument structure.
297 */
298 if (vps_p[0] && *vps_p[0])
299 error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
300 else {
301 printf("null_bypass: no map for %s\n", descp->vdesc_name);
302 error = EINVAL;
303 }
304
305 /*
306 * Maintain the illusion of call-by-value
307 * by restoring vnodes in the argument structure
308 * to their original value.
309 */
310 reles = descp->vdesc_flags;
311 for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
312 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
313 break; /* bail out at end of list */
314 if (old_vps[i]) {
315 *(vps_p[i]) = old_vps[i];
316#if 0
317 if (reles & VDESC_VP0_WILLUNLOCK)
318 VOP_UNLOCK(*(vps_p[i]), LK_THISLAYER, curproc);
319#endif
320 if (reles & VDESC_VP0_WILLRELE)
321 vrele(*(vps_p[i]));
322 }
323 }
324
325 /*
326 * Map the possible out-going vpp
327 * (Assumes that the lower layer always returns
328 * a VREF'ed vpp unless it gets an error.)
329 */
330 if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
331 !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
332 !error) {
333 /*
334 * XXX - even though some ops have vpp returned vp's,
335 * several ops actually vrele this before returning.
336 * We must avoid these ops.
337 * (This should go away when these ops are regularized.)
338 */
339 if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
340 goto out;
341 vppp = VOPARG_OFFSETTO(struct vnode***,
342 descp->vdesc_vpp_offset,ap);
343 if (*vppp)
344 error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
345 }
346
347 out:
348 return (error);
349}
350
351/*
352 * We have to carry on the locking protocol on the null layer vnodes
353 * as we progress through the tree. We also have to enforce read-only
354 * if this layer is mounted read-only.
355 */
356static int
357null_lookup(ap)
358 struct vop_lookup_args /* {
359 struct vnode * a_dvp;
360 struct vnode ** a_vpp;
361 struct componentname * a_cnp;
362 } */ *ap;
363{
364 struct componentname *cnp = ap->a_cnp;
365 struct vnode *dvp = ap->a_dvp;
dadab5e9 366 struct thread *td = cnp->cn_td;
984263bc
MD
367 int flags = cnp->cn_flags;
368 struct vnode *vp, *ldvp, *lvp;
369 int error;
370
371 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
372 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
373 return (EROFS);
374 /*
375 * Although it is possible to call null_bypass(), we'll do
376 * a direct call to reduce overhead
377 */
378 ldvp = NULLVPTOLOWERVP(dvp);
379 vp = lvp = NULL;
380 error = VOP_LOOKUP(ldvp, &lvp, cnp);
381 if (error == EJUSTRETURN && (flags & ISLASTCN) &&
382 (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
383 (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME))
384 error = EROFS;
385
386 /*
387 * Rely only on the PDIRUNLOCK flag which should be carefully
388 * tracked by underlying filesystem.
389 */
390 if (cnp->cn_flags & PDIRUNLOCK)
dadab5e9 391 VOP_UNLOCK(dvp, LK_THISLAYER, td);
984263bc
MD
392 if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) {
393 if (ldvp == lvp) {
394 *ap->a_vpp = dvp;
395 VREF(dvp);
396 vrele(lvp);
397 } else {
398 error = null_node_create(dvp->v_mount, lvp, &vp);
399 if (error == 0)
400 *ap->a_vpp = vp;
401 }
402 }
403 return (error);
404}
405
406/*
407 * Setattr call. Disallow write attempts if the layer is mounted read-only.
408 */
409int
410null_setattr(ap)
411 struct vop_setattr_args /* {
412 struct vnodeop_desc *a_desc;
413 struct vnode *a_vp;
414 struct vattr *a_vap;
415 struct ucred *a_cred;
dadab5e9 416 struct thread *a_td;
984263bc
MD
417 } */ *ap;
418{
419 struct vnode *vp = ap->a_vp;
420 struct vattr *vap = ap->a_vap;
421
422 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
423 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
424 vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
425 (vp->v_mount->mnt_flag & MNT_RDONLY))
426 return (EROFS);
427 if (vap->va_size != VNOVAL) {
428 switch (vp->v_type) {
429 case VDIR:
430 return (EISDIR);
431 case VCHR:
432 case VBLK:
433 case VSOCK:
434 case VFIFO:
435 if (vap->va_flags != VNOVAL)
436 return (EOPNOTSUPP);
437 return (0);
438 case VREG:
439 case VLNK:
440 default:
441 /*
442 * Disallow write attempts if the filesystem is
443 * mounted read-only.
444 */
445 if (vp->v_mount->mnt_flag & MNT_RDONLY)
446 return (EROFS);
447 }
448 }
449
450 return (null_bypass((struct vop_generic_args *)ap));
451}
452
453/*
454 * We handle getattr only to change the fsid.
455 */
456static int
457null_getattr(ap)
458 struct vop_getattr_args /* {
459 struct vnode *a_vp;
460 struct vattr *a_vap;
461 struct ucred *a_cred;
dadab5e9 462 struct thread *a_td;
984263bc
MD
463 } */ *ap;
464{
465 int error;
466
467 if ((error = null_bypass((struct vop_generic_args *)ap)) != 0)
468 return (error);
469
470 ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
471 return (0);
472}
473
474/*
475 * Handle to disallow write access if mounted read-only.
476 */
477static int
478null_access(ap)
479 struct vop_access_args /* {
480 struct vnode *a_vp;
481 int a_mode;
482 struct ucred *a_cred;
dadab5e9 483 struct thread *a_td;
984263bc
MD
484 } */ *ap;
485{
486 struct vnode *vp = ap->a_vp;
487 mode_t mode = ap->a_mode;
488
489 /*
490 * Disallow write attempts on read-only layers;
491 * unless the file is a socket, fifo, or a block or
492 * character device resident on the file system.
493 */
494 if (mode & VWRITE) {
495 switch (vp->v_type) {
496 case VDIR:
497 case VLNK:
498 case VREG:
499 if (vp->v_mount->mnt_flag & MNT_RDONLY)
500 return (EROFS);
501 break;
502 default:
503 break;
504 }
505 }
506 return (null_bypass((struct vop_generic_args *)ap));
507}
508
509/*
510 * We must handle open to be able to catch MNT_NODEV and friends.
511 */
512static int
513null_open(ap)
514 struct vop_open_args /* {
515 struct vnode *a_vp;
516 int a_mode;
517 struct ucred *a_cred;
dadab5e9 518 struct thread *a_td;
984263bc
MD
519 } */ *ap;
520{
521 struct vnode *vp = ap->a_vp;
522 struct vnode *lvp = NULLVPTOLOWERVP(ap->a_vp);
523
524 if ((vp->v_mount->mnt_flag & MNT_NODEV) &&
525 (lvp->v_type == VBLK || lvp->v_type == VCHR))
526 return ENXIO;
527
528 return (null_bypass((struct vop_generic_args *)ap));
529}
530
531/*
532 * We handle this to eliminate null FS to lower FS
533 * file moving. Don't know why we don't allow this,
534 * possibly we should.
535 */
536static int
537null_rename(ap)
538 struct vop_rename_args /* {
539 struct vnode *a_fdvp;
540 struct vnode *a_fvp;
541 struct componentname *a_fcnp;
542 struct vnode *a_tdvp;
543 struct vnode *a_tvp;
544 struct componentname *a_tcnp;
545 } */ *ap;
546{
547 struct vnode *tdvp = ap->a_tdvp;
548 struct vnode *fvp = ap->a_fvp;
549 struct vnode *fdvp = ap->a_fdvp;
550 struct vnode *tvp = ap->a_tvp;
551
552 /* Check for cross-device rename. */
553 if ((fvp->v_mount != tdvp->v_mount) ||
554 (tvp && (fvp->v_mount != tvp->v_mount))) {
555 if (tdvp == tvp)
556 vrele(tdvp);
557 else
558 vput(tdvp);
559 if (tvp)
560 vput(tvp);
561 vrele(fdvp);
562 vrele(fvp);
563 return (EXDEV);
564 }
565
566 return (null_bypass((struct vop_generic_args *)ap));
567}
568
569/*
570 * We need to process our own vnode lock and then clear the
571 * interlock flag as it applies only to our vnode, not the
572 * vnodes below us on the stack.
573 */
574static int
575null_lock(ap)
576 struct vop_lock_args /* {
577 struct vnode *a_vp;
578 int a_flags;
dadab5e9 579 struct thread *a_td;
984263bc
MD
580 } */ *ap;
581{
582 struct vnode *vp = ap->a_vp;
583 int flags = ap->a_flags;
984263bc
MD
584 struct null_node *np = VTONULL(vp);
585 struct vnode *lvp;
586 int error;
587
588 if (flags & LK_THISLAYER) {
589 if (vp->v_vnlock != NULL) {
590 /* lock is shared across layers */
591 if (flags & LK_INTERLOCK)
592 simple_unlock(&vp->v_interlock);
593 return 0;
594 }
595 error = lockmgr(&np->null_lock, flags & ~LK_THISLAYER,
dadab5e9 596 &vp->v_interlock, ap->a_td);
984263bc
MD
597 return (error);
598 }
599
600 if (vp->v_vnlock != NULL) {
601 /*
602 * The lower level has exported a struct lock to us. Use
603 * it so that all vnodes in the stack lock and unlock
604 * simultaneously. Note: we don't DRAIN the lock as DRAIN
605 * decommissions the lock - just because our vnode is
606 * going away doesn't mean the struct lock below us is.
607 * LK_EXCLUSIVE is fine.
608 */
609 if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
610 NULLFSDEBUG("null_lock: avoiding LK_DRAIN\n");
611 return(lockmgr(vp->v_vnlock,
612 (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE,
dadab5e9 613 &vp->v_interlock, ap->a_td));
984263bc 614 }
dadab5e9 615 return(lockmgr(vp->v_vnlock, flags, &vp->v_interlock, ap->a_td));
984263bc
MD
616 }
617 /*
618 * To prevent race conditions involving doing a lookup
619 * on "..", we have to lock the lower node, then lock our
620 * node. Most of the time it won't matter that we lock our
621 * node (as any locking would need the lower one locked
622 * first). But we can LK_DRAIN the upper lock as a step
623 * towards decomissioning it.
624 */
625 lvp = NULLVPTOLOWERVP(vp);
626 if (lvp == NULL)
dadab5e9 627 return (lockmgr(&np->null_lock, flags, &vp->v_interlock, ap->a_td));
984263bc
MD
628 if (flags & LK_INTERLOCK) {
629 VI_UNLOCK(vp);
630 flags &= ~LK_INTERLOCK;
631 }
632 if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
633 error = VOP_LOCK(lvp,
dadab5e9 634 (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE, ap->a_td);
984263bc 635 } else
dadab5e9 636 error = VOP_LOCK(lvp, flags, ap->a_td);
984263bc
MD
637 if (error)
638 return (error);
dadab5e9 639 error = lockmgr(&np->null_lock, flags, &vp->v_interlock, ap->a_td);
984263bc 640 if (error)
dadab5e9 641 VOP_UNLOCK(lvp, 0, ap->a_td);
984263bc
MD
642 return (error);
643}
644
645/*
646 * We need to process our own vnode unlock and then clear the
647 * interlock flag as it applies only to our vnode, not the
648 * vnodes below us on the stack.
649 */
650static int
651null_unlock(ap)
652 struct vop_unlock_args /* {
653 struct vnode *a_vp;
654 int a_flags;
dadab5e9 655 struct thread *a_td;
984263bc
MD
656 } */ *ap;
657{
658 struct vnode *vp = ap->a_vp;
659 int flags = ap->a_flags;
984263bc
MD
660 struct null_node *np = VTONULL(vp);
661 struct vnode *lvp;
662
663 if (vp->v_vnlock != NULL) {
664 if (flags & LK_THISLAYER)
665 return 0; /* the lock is shared across layers */
666 flags &= ~LK_THISLAYER;
667 return (lockmgr(vp->v_vnlock, flags | LK_RELEASE,
dadab5e9 668 &vp->v_interlock, ap->a_td));
984263bc
MD
669 }
670 lvp = NULLVPTOLOWERVP(vp);
671 if (lvp == NULL)
dadab5e9 672 return (lockmgr(&np->null_lock, flags | LK_RELEASE, &vp->v_interlock, ap->a_td));
984263bc
MD
673 if ((flags & LK_THISLAYER) == 0) {
674 if (flags & LK_INTERLOCK) {
675 VI_UNLOCK(vp);
676 flags &= ~LK_INTERLOCK;
677 }
dadab5e9 678 VOP_UNLOCK(lvp, flags, ap->a_td);
984263bc
MD
679 } else
680 flags &= ~LK_THISLAYER;
681 ap->a_flags = flags;
dadab5e9 682 return (lockmgr(&np->null_lock, flags | LK_RELEASE, &vp->v_interlock, ap->a_td));
984263bc
MD
683}
684
685static int
686null_islocked(ap)
687 struct vop_islocked_args /* {
688 struct vnode *a_vp;
dadab5e9 689 struct thread *a_td;
984263bc
MD
690 } */ *ap;
691{
692 struct vnode *vp = ap->a_vp;
984263bc
MD
693
694 if (vp->v_vnlock != NULL)
dadab5e9
MD
695 return (lockstatus(vp->v_vnlock, ap->a_td));
696 return (lockstatus(&VTONULL(vp)->null_lock, ap->a_td));
984263bc
MD
697}
698
699
700/*
701 * There is no way to tell that someone issued remove/rmdir operation
702 * on the underlying filesystem. For now we just have to release lowevrp
703 * as soon as possible.
704 */
705static int
706null_inactive(ap)
707 struct vop_inactive_args /* {
708 struct vnode *a_vp;
dadab5e9 709 struct thread *a_td;
984263bc
MD
710 } */ *ap;
711{
712 struct vnode *vp = ap->a_vp;
984263bc
MD
713 struct null_node *xp = VTONULL(vp);
714 struct vnode *lowervp = xp->null_lowervp;
715
dadab5e9 716 lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, ap->a_td);
984263bc 717 LIST_REMOVE(xp, null_hash);
dadab5e9 718 lockmgr(&null_hashlock, LK_RELEASE, NULL, ap->a_td);
984263bc
MD
719
720 xp->null_lowervp = NULLVP;
721 if (vp->v_vnlock != NULL) {
722 vp->v_vnlock = &xp->null_lock; /* we no longer share the lock */
723 } else
dadab5e9 724 VOP_UNLOCK(vp, LK_THISLAYER, ap->a_td);
984263bc
MD
725
726 vput(lowervp);
727 /*
728 * Now it is safe to drop references to the lower vnode.
729 * VOP_INACTIVE() will be called by vrele() if necessary.
730 */
731 vrele (lowervp);
732
733 return (0);
734}
735
736/*
737 * We can free memory in null_inactive, but we do this
738 * here. (Possible to guard vp->v_data to point somewhere)
739 */
740static int
741null_reclaim(ap)
742 struct vop_reclaim_args /* {
743 struct vnode *a_vp;
dadab5e9 744 struct thread *a_td;
984263bc
MD
745 } */ *ap;
746{
747 struct vnode *vp = ap->a_vp;
748 void *vdata = vp->v_data;
749
750 vp->v_data = NULL;
751 FREE(vdata, M_NULLFSNODE);
752
753 return (0);
754}
755
756static int
757null_print(ap)
758 struct vop_print_args /* {
759 struct vnode *a_vp;
760 } */ *ap;
761{
762 struct vnode *vp = ap->a_vp;
763
764 printf ("\ttag VT_NULLFS, vp=%p, lowervp=%p\n", vp, NULLVPTOLOWERVP(vp));
765 if (vp->v_vnlock != NULL) {
766 printf("\tvnlock: ");
767 lockmgr_printinfo(vp->v_vnlock);
768 } else {
769 printf("\tnull_lock: ");
770 lockmgr_printinfo(&VTONULL(vp)->null_lock);
771 }
772 printf("\n");
773 return (0);
774}
775
776/*
777 * Let an underlying filesystem do the work
778 */
779static int
780null_createvobject(ap)
781 struct vop_createvobject_args /* {
782 struct vnode *vp;
783 struct ucred *cred;
784 struct proc *p;
785 } */ *ap;
786{
787 struct vnode *vp = ap->a_vp;
788 struct vnode *lowervp = VTONULL(vp) ? NULLVPTOLOWERVP(vp) : NULL;
789 int error;
790
791 if (vp->v_type == VNON || lowervp == NULL)
792 return 0;
dadab5e9 793 error = VOP_CREATEVOBJECT(lowervp, ap->a_cred, ap->a_td);
984263bc
MD
794 if (error)
795 return (error);
796 vp->v_flag |= VOBJBUF;
797 return (0);
798}
799
800/*
801 * We have nothing to destroy and this operation shouldn't be bypassed.
802 */
803static int
804null_destroyvobject(ap)
805 struct vop_destroyvobject_args /* {
806 struct vnode *vp;
807 } */ *ap;
808{
809 struct vnode *vp = ap->a_vp;
810
811 vp->v_flag &= ~VOBJBUF;
812 return (0);
813}
814
815static int
816null_getvobject(ap)
817 struct vop_getvobject_args /* {
818 struct vnode *vp;
819 struct vm_object **objpp;
820 } */ *ap;
821{
822 struct vnode *lvp = NULLVPTOLOWERVP(ap->a_vp);
823
824 if (lvp == NULL)
825 return EINVAL;
826 return (VOP_GETVOBJECT(lvp, ap->a_objpp));
827}
828
829/*
830 * Global vfs data structures
831 */
832vop_t **null_vnodeop_p;
833static struct vnodeopv_entry_desc null_vnodeop_entries[] = {
834 { &vop_default_desc, (vop_t *) null_bypass },
835 { &vop_access_desc, (vop_t *) null_access },
836 { &vop_createvobject_desc, (vop_t *) null_createvobject },
837 { &vop_destroyvobject_desc, (vop_t *) null_destroyvobject },
838 { &vop_getattr_desc, (vop_t *) null_getattr },
839 { &vop_getvobject_desc, (vop_t *) null_getvobject },
840 { &vop_inactive_desc, (vop_t *) null_inactive },
841 { &vop_islocked_desc, (vop_t *) null_islocked },
842 { &vop_lock_desc, (vop_t *) null_lock },
843 { &vop_lookup_desc, (vop_t *) null_lookup },
844 { &vop_open_desc, (vop_t *) null_open },
845 { &vop_print_desc, (vop_t *) null_print },
846 { &vop_reclaim_desc, (vop_t *) null_reclaim },
847 { &vop_rename_desc, (vop_t *) null_rename },
848 { &vop_setattr_desc, (vop_t *) null_setattr },
849 { &vop_unlock_desc, (vop_t *) null_unlock },
850 { NULL, NULL }
851};
852static struct vnodeopv_desc null_vnodeop_opv_desc =
853 { &null_vnodeop_p, null_vnodeop_entries };
854
855VNODEOP_SET(null_vnodeop_opv_desc);