kernel - Enhance getcacheblk() (improve saturated write performance (3)).
[dragonfly.git] / sys / vfs / tmpfs / tmpfs_vnops.c
CommitLineData
7a2de9a4
MD
1/*-
2 * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to The NetBSD Foundation
6 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
7 * 2005 program.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
80ae59d7
MD
29 *
30 * $NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $
7a2de9a4
MD
31 */
32
33/*
34 * tmpfs vnode interface.
35 */
7a2de9a4
MD
36
37#include <sys/kernel.h>
38#include <sys/kern_syscall.h>
39#include <sys/param.h>
40#include <sys/fcntl.h>
41#include <sys/lockf.h>
42#include <sys/priv.h>
43#include <sys/proc.h>
44#include <sys/resourcevar.h>
45#include <sys/sched.h>
7a2de9a4
MD
46#include <sys/stat.h>
47#include <sys/systm.h>
48#include <sys/unistd.h>
49#include <sys/vfsops.h>
50#include <sys/vnode.h>
51
52#include <sys/mplock2.h>
53
54#include <vm/vm.h>
55#include <vm/vm_object.h>
56#include <vm/vm_page.h>
57#include <vm/vm_pager.h>
b7545cb3 58#include <vm/swap_pager.h>
7a2de9a4
MD
59
60#include <vfs/fifofs/fifo.h>
61#include <vfs/tmpfs/tmpfs_vnops.h>
62#include <vfs/tmpfs/tmpfs.h>
63
64MALLOC_DECLARE(M_TMPFS);
65
80ae59d7
MD
66static __inline
67void
68tmpfs_knote(struct vnode *vp, int flags)
69{
70 if (flags)
71 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
72}
73
74
7a2de9a4
MD
75/* --------------------------------------------------------------------- */
76
77static int
78tmpfs_nresolve(struct vop_nresolve_args *v)
79{
80 struct vnode *dvp = v->a_dvp;
81 struct vnode *vp = NULL;
82 struct namecache *ncp = v->a_nch->ncp;
d89ce96a 83 struct tmpfs_node *tnode;
7a2de9a4
MD
84
85 int error;
86 struct tmpfs_dirent *de;
87 struct tmpfs_node *dnode;
88
89 dnode = VP_TO_TMPFS_DIR(dvp);
90
7a2de9a4
MD
91 de = tmpfs_dir_lookup(dnode, NULL, ncp);
92 if (de == NULL) {
d89ce96a 93 error = ENOENT;
7a2de9a4 94 } else {
d89ce96a
MD
95 /*
96 * Allocate a vnode for the node we found.
97 */
7a2de9a4 98 tnode = de->td_node;
7a2de9a4 99 error = tmpfs_alloc_vp(dvp->v_mount, tnode,
d89ce96a
MD
100 LK_EXCLUSIVE | LK_RETRY, &vp);
101 if (error)
7a2de9a4 102 goto out;
d89ce96a 103 KKASSERT(vp);
7a2de9a4
MD
104 }
105
7a2de9a4 106out:
d89ce96a
MD
107 /*
108 * Store the result of this lookup in the cache. Avoid this if the
7a2de9a4 109 * request was for creation, as it does not improve timings on
d89ce96a
MD
110 * emprical tests.
111 */
7a2de9a4
MD
112 if (vp) {
113 vn_unlock(vp);
114 cache_setvp(v->a_nch, vp);
115 vrele(vp);
d89ce96a 116 } else if (error == ENOENT) {
7a2de9a4
MD
117 cache_setvp(v->a_nch, NULL);
118 }
119 return error;
120}
121
122static int
123tmpfs_nlookupdotdot(struct vop_nlookupdotdot_args *v)
124{
125 struct vnode *dvp = v->a_dvp;
126 struct vnode **vpp = v->a_vpp;
127 struct tmpfs_node *dnode = VP_TO_TMPFS_NODE(dvp);
128 struct ucred *cred = v->a_cred;
129 int error;
130
131 *vpp = NULL;
132 /* Check accessibility of requested node as a first step. */
133 error = VOP_ACCESS(dvp, VEXEC, cred);
134 if (error != 0)
135 return error;
136
137 if (dnode->tn_dir.tn_parent != NULL) {
138 /* Allocate a new vnode on the matching entry. */
139 error = tmpfs_alloc_vp(dvp->v_mount, dnode->tn_dir.tn_parent,
140 LK_EXCLUSIVE | LK_RETRY, vpp);
141
142 if (*vpp)
143 vn_unlock(*vpp);
144 }
145
146 return (*vpp == NULL) ? ENOENT : 0;
147}
148
149/* --------------------------------------------------------------------- */
150
151static int
152tmpfs_ncreate(struct vop_ncreate_args *v)
153{
154 struct vnode *dvp = v->a_dvp;
155 struct vnode **vpp = v->a_vpp;
156 struct namecache *ncp = v->a_nch->ncp;
157 struct vattr *vap = v->a_vap;
158 struct ucred *cred = v->a_cred;
159 int error;
160
161 KKASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
162
7a2de9a4
MD
163 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
164 if (error == 0) {
165 cache_setunresolved(v->a_nch);
166 cache_setvp(v->a_nch, *vpp);
80ae59d7 167 tmpfs_knote(dvp, NOTE_WRITE);
7a2de9a4 168 }
7a2de9a4
MD
169
170 return error;
171}
172/* --------------------------------------------------------------------- */
173
174static int
175tmpfs_nmknod(struct vop_nmknod_args *v)
176{
177 struct vnode *dvp = v->a_dvp;
178 struct vnode **vpp = v->a_vpp;
179 struct namecache *ncp = v->a_nch->ncp;
180 struct vattr *vap = v->a_vap;
181 struct ucred *cred = v->a_cred;
182 int error;
183
184 if (vap->va_type != VBLK && vap->va_type != VCHR &&
185 vap->va_type != VFIFO)
186 return EINVAL;
187
7a2de9a4
MD
188 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
189 if (error == 0) {
190 cache_setunresolved(v->a_nch);
191 cache_setvp(v->a_nch, *vpp);
80ae59d7 192 tmpfs_knote(dvp, NOTE_WRITE);
7a2de9a4 193 }
7a2de9a4
MD
194
195 return error;
196}
197
198/* --------------------------------------------------------------------- */
199
200static int
201tmpfs_open(struct vop_open_args *v)
202{
203 struct vnode *vp = v->a_vp;
204 int mode = v->a_mode;
205
206 int error;
207 struct tmpfs_node *node;
208
7a2de9a4
MD
209 node = VP_TO_TMPFS_NODE(vp);
210
211 /* The file is still active but all its names have been removed
212 * (e.g. by a "rmdir $(pwd)"). It cannot be opened any more as
213 * it is about to die. */
214 if (node->tn_links < 1)
215 return (ENOENT);
216
217 /* If the file is marked append-only, deny write requests. */
630e3a33
MD
218 if ((node->tn_flags & APPEND) &&
219 (mode & (FWRITE | O_APPEND)) == FWRITE) {
7a2de9a4 220 error = EPERM;
630e3a33 221 } else {
7a2de9a4
MD
222 return (vop_stdopen(v));
223 }
7a2de9a4
MD
224 return error;
225}
226
227/* --------------------------------------------------------------------- */
228
229static int
230tmpfs_close(struct vop_close_args *v)
231{
232 struct vnode *vp = v->a_vp;
233 struct tmpfs_node *node;
234
235 node = VP_TO_TMPFS_NODE(vp);
236
237 if (node->tn_links > 0) {
238 /* Update node times. No need to do it if the node has
239 * been deleted, because it will vanish after we return. */
240 tmpfs_update(vp);
241 }
242
243 return vop_stdclose(v);
244}
245
246/* --------------------------------------------------------------------- */
247
248int
249tmpfs_access(struct vop_access_args *v)
250{
251 struct vnode *vp = v->a_vp;
252 int error;
253 struct tmpfs_node *node;
254
7a2de9a4
MD
255 node = VP_TO_TMPFS_NODE(vp);
256
257 switch (vp->v_type) {
258 case VDIR:
259 /* FALLTHROUGH */
260 case VLNK:
261 /* FALLTHROUGH */
262 case VREG:
5a9e9ac7 263 if ((v->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
7a2de9a4
MD
264 error = EROFS;
265 goto out;
266 }
267 break;
268
269 case VBLK:
270 /* FALLTHROUGH */
271 case VCHR:
272 /* FALLTHROUGH */
273 case VSOCK:
274 /* FALLTHROUGH */
275 case VFIFO:
276 break;
277
278 default:
279 error = EINVAL;
280 goto out;
281 }
282
5a9e9ac7 283 if ((v->a_mode & VWRITE) && (node->tn_flags & IMMUTABLE)) {
7a2de9a4
MD
284 error = EPERM;
285 goto out;
286 }
287
288 error = vop_helper_access(v, node->tn_uid, node->tn_gid, node->tn_mode, 0);
289
290out:
291
292 return error;
293}
294
295/* --------------------------------------------------------------------- */
296
297int
298tmpfs_getattr(struct vop_getattr_args *v)
299{
300 struct vnode *vp = v->a_vp;
301 struct vattr *vap = v->a_vap;
7a2de9a4 302 struct tmpfs_node *node;
7a2de9a4
MD
303
304 node = VP_TO_TMPFS_NODE(vp);
305
306 tmpfs_update(vp);
307
308 vap->va_type = vp->v_type;
309 vap->va_mode = node->tn_mode;
310 vap->va_nlink = node->tn_links;
311 vap->va_uid = node->tn_uid;
312 vap->va_gid = node->tn_gid;
313 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
314 vap->va_fileid = node->tn_id;
315 vap->va_size = node->tn_size;
316 vap->va_blocksize = PAGE_SIZE;
317 vap->va_atime.tv_sec = node->tn_atime;
318 vap->va_atime.tv_nsec = node->tn_atimensec;
319 vap->va_mtime.tv_sec = node->tn_mtime;
320 vap->va_mtime.tv_nsec = node->tn_mtimensec;
321 vap->va_ctime.tv_sec = node->tn_ctime;
322 vap->va_ctime.tv_nsec = node->tn_ctimensec;
323 vap->va_gen = node->tn_gen;
324 vap->va_flags = node->tn_flags;
325 if (vp->v_type == VBLK || vp->v_type == VCHR)
326 {
327 vap->va_rmajor = umajor(node->tn_rdev);
328 vap->va_rminor = uminor(node->tn_rdev);
329 }
330 vap->va_bytes = round_page(node->tn_size);
331 vap->va_filerev = 0;
332
7a2de9a4
MD
333 return 0;
334}
335
336/* --------------------------------------------------------------------- */
337
338int
339tmpfs_setattr(struct vop_setattr_args *v)
340{
341 struct vnode *vp = v->a_vp;
342 struct vattr *vap = v->a_vap;
343 struct ucred *cred = v->a_cred;
80ae59d7 344 struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
7a2de9a4 345 int error = 0;
80ae59d7 346 int kflags = 0;
7a2de9a4 347
80ae59d7 348 if (error == 0 && (vap->va_flags != VNOVAL)) {
7a2de9a4 349 error = tmpfs_chflags(vp, vap->va_flags, cred);
80ae59d7
MD
350 kflags |= NOTE_ATTRIB;
351 }
7a2de9a4 352
80ae59d7
MD
353 if (error == 0 && (vap->va_size != VNOVAL)) {
354 if (vap->va_size > node->tn_size)
355 kflags |= NOTE_WRITE | NOTE_EXTEND;
356 else
357 kflags |= NOTE_WRITE;
7a2de9a4 358 error = tmpfs_chsize(vp, vap->va_size, cred);
80ae59d7 359 }
7a2de9a4 360
d89ce96a
MD
361 if (error == 0 && (vap->va_uid != (uid_t)VNOVAL ||
362 vap->va_gid != (gid_t)VNOVAL)) {
7a2de9a4 363 error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred);
80ae59d7 364 kflags |= NOTE_ATTRIB;
d89ce96a 365 }
7a2de9a4 366
80ae59d7 367 if (error == 0 && (vap->va_mode != (mode_t)VNOVAL)) {
7a2de9a4 368 error = tmpfs_chmod(vp, vap->va_mode, cred);
80ae59d7
MD
369 kflags |= NOTE_ATTRIB;
370 }
7a2de9a4
MD
371
372 if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
373 vap->va_atime.tv_nsec != VNOVAL) ||
374 (vap->va_mtime.tv_sec != VNOVAL &&
d89ce96a 375 vap->va_mtime.tv_nsec != VNOVAL) )) {
7a2de9a4 376 error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
d89ce96a 377 vap->va_vaflags, cred);
80ae59d7 378 kflags |= NOTE_ATTRIB;
d89ce96a 379 }
7a2de9a4
MD
380
381 /* Update the node times. We give preference to the error codes
382 * generated by this function rather than the ones that may arise
383 * from tmpfs_update. */
384 tmpfs_update(vp);
80ae59d7 385 tmpfs_knote(vp, kflags);
7a2de9a4 386
7a2de9a4
MD
387 return error;
388}
389
390/* --------------------------------------------------------------------- */
391
9fc94b5f 392/*
630e3a33
MD
393 * fsync is usually a NOP, but we must take action when unmounting or
394 * when recycling.
9fc94b5f 395 */
7a2de9a4
MD
396static int
397tmpfs_fsync(struct vop_fsync_args *v)
398{
9fc94b5f 399 struct tmpfs_mount *tmp;
630e3a33 400 struct tmpfs_node *node;
7a2de9a4
MD
401 struct vnode *vp = v->a_vp;
402
9fc94b5f 403 tmp = VFS_TO_TMPFS(vp->v_mount);
630e3a33
MD
404 node = VP_TO_TMPFS_NODE(vp);
405
406 tmpfs_update(vp);
407 if (vp->v_type == VREG) {
d4623db3 408 if (vp->v_flag & VRECLAIMED) {
630e3a33
MD
409 if (node->tn_links == 0)
410 tmpfs_truncate(vp, 0);
411 else
412 vfsync(v->a_vp, v->a_waitfor, 1, NULL, NULL);
413 }
9fc94b5f 414 }
7a2de9a4
MD
415 return 0;
416}
417
418/* --------------------------------------------------------------------- */
419
420static int
421tmpfs_read (struct vop_read_args *ap)
422{
423 struct buf *bp;
424 struct vnode *vp = ap->a_vp;
425 struct uio *uio = ap->a_uio;
426 struct tmpfs_node *node;
7a2de9a4 427 off_t base_offset;
9fc94b5f 428 size_t offset;
7a2de9a4 429 size_t len;
9fc94b5f 430 int error;
7a2de9a4
MD
431
432 error = 0;
433 if (uio->uio_resid == 0) {
434 return error;
435 }
436
437 node = VP_TO_TMPFS_NODE(vp);
438
439 if (uio->uio_offset < 0)
440 return (EINVAL);
441 if (vp->v_type != VREG)
442 return (EINVAL);
443
7a2de9a4
MD
444 while (uio->uio_resid > 0 && uio->uio_offset < node->tn_size) {
445 /*
446 * Use buffer cache I/O (via tmpfs_strategy)
447 */
9fc94b5f 448 offset = (size_t)uio->uio_offset & BMASK;
7a2de9a4 449 base_offset = (off_t)uio->uio_offset - offset;
72d6a027 450 bp = getcacheblk(vp, base_offset, BSIZE);
7a2de9a4
MD
451 if (bp == NULL)
452 {
7a2de9a4
MD
453 error = bread(vp, base_offset, BSIZE, &bp);
454 if (error) {
455 brelse(bp);
456 kprintf("tmpfs_read bread error %d\n", error);
457 break;
458 }
459 }
460
7a2de9a4
MD
461 /*
462 * Figure out how many bytes we can actually copy this loop.
463 */
464 len = BSIZE - offset;
465 if (len > uio->uio_resid)
466 len = uio->uio_resid;
467 if (len > node->tn_size - uio->uio_offset)
468 len = (size_t)(node->tn_size - uio->uio_offset);
469
470 error = uiomove((char *)bp->b_data + offset, len, uio);
471 bqrelse(bp);
472 if (error) {
473 kprintf("tmpfs_read uiomove error %d\n", error);
474 break;
475 }
476 }
477
7a2de9a4
MD
478 TMPFS_NODE_LOCK(node);
479 node->tn_status |= TMPFS_NODE_ACCESSED;
480 TMPFS_NODE_UNLOCK(node);
481
7a2de9a4
MD
482 return(error);
483}
484
485static int
486tmpfs_write (struct vop_write_args *ap)
487{
488 struct buf *bp;
489 struct vnode *vp = ap->a_vp;
490 struct uio *uio = ap->a_uio;
491 struct thread *td = uio->uio_td;
492 struct tmpfs_node *node;
493 boolean_t extended;
494 off_t oldsize;
495 int error;
7a2de9a4 496 off_t base_offset;
9fc94b5f 497 size_t offset;
7a2de9a4
MD
498 size_t len;
499 struct rlimit limit;
7a2de9a4 500 int trivial = 0;
80ae59d7 501 int kflags = 0;
7a2de9a4
MD
502
503 error = 0;
504 if (uio->uio_resid == 0) {
505 return error;
506 }
507
508 node = VP_TO_TMPFS_NODE(vp);
509
510 if (vp->v_type != VREG)
511 return (EINVAL);
512
513 oldsize = node->tn_size;
514 if (ap->a_ioflag & IO_APPEND)
515 uio->uio_offset = node->tn_size;
516
517 /*
518 * Check for illegal write offsets.
519 */
520 if (uio->uio_offset + uio->uio_resid >
521 VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize)
522 return (EFBIG);
523
524 if (vp->v_type == VREG && td != NULL) {
525 error = kern_getrlimit(RLIMIT_FSIZE, &limit);
526 if (error != 0)
527 return error;
528 if (uio->uio_offset + uio->uio_resid > limit.rlim_cur) {
529 ksignal(td->td_proc, SIGXFSZ);
530 return (EFBIG);
531 }
532 }
533
534
535 /*
536 * Extend the file's size if necessary
537 */
9fc94b5f 538 extended = ((uio->uio_offset + uio->uio_resid) > node->tn_size);
7a2de9a4 539
b5d16701
MD
540 get_mplock();
541
7a2de9a4
MD
542 while (uio->uio_resid > 0) {
543 /*
544 * Use buffer cache I/O (via tmpfs_strategy)
545 */
9fc94b5f 546 offset = (size_t)uio->uio_offset & BMASK;
7a2de9a4
MD
547 base_offset = (off_t)uio->uio_offset - offset;
548 len = BSIZE - offset;
549 if (len > uio->uio_resid)
550 len = uio->uio_resid;
551
552 if ((uio->uio_offset + len) > node->tn_size) {
9fc94b5f 553 trivial = (uio->uio_offset <= node->tn_size);
7a2de9a4
MD
554 error = tmpfs_reg_resize(vp, uio->uio_offset + len, trivial);
555 if (error)
556 break;
557 }
558
9fc94b5f
MD
559 /*
560 * Read to fill in any gaps. Theoretically we could
561 * optimize this if the write covers the entire buffer
562 * and is not a UIO_NOCOPY write, however this can lead
563 * to a security violation exposing random kernel memory
564 * (whatever junk was in the backing VM pages before).
565 *
566 * So just use bread() to do the right thing.
567 */
568 error = bread(vp, base_offset, BSIZE, &bp);
7a2de9a4
MD
569 error = uiomove((char *)bp->b_data + offset, len, uio);
570 if (error) {
571 kprintf("tmpfs_write uiomove error %d\n", error);
572 brelse(bp);
573 break;
574 }
575
80ae59d7 576 if (uio->uio_offset > node->tn_size) {
7a2de9a4 577 node->tn_size = uio->uio_offset;
80ae59d7
MD
578 kflags |= NOTE_EXTEND;
579 }
580 kflags |= NOTE_WRITE;
7a2de9a4
MD
581
582 /*
9fc94b5f 583 * The data has been loaded into the buffer, write it out.
7a2de9a4 584 *
9fc94b5f
MD
585 * We want tmpfs to be able to use all available ram, not
586 * just the buffer cache, so if not explicitly paging we
587 * use buwrite() to leave the buffer clean but mark all the
588 * VM pages valid+dirty.
7a2de9a4 589 *
d89ce96a
MD
590 * When the kernel is paging, either via normal pageout
591 * operation or when cleaning the object during a recycle,
592 * the underlying VM pages are going to get thrown away
593 * so we MUST write them to swap.
594 *
595 * XXX unfortunately this catches msync() system calls too
596 * for the moment.
7a2de9a4 597 */
b7545cb3
AH
598 if (vm_swap_size == 0) {
599 /*
600 * if swap isn't configured yet, force a buwrite() to
601 * avoid problems further down the line, due to flushing
602 * to swap.
603 */
9fc94b5f 604 buwrite(bp);
b7545cb3
AH
605 } else {
606 if (ap->a_ioflag & IO_SYNC) {
607 bwrite(bp);
608 } else if ((ap->a_ioflag & IO_ASYNC) ||
609 (uio->uio_segflg == UIO_NOCOPY)) {
610 bawrite(bp);
611 } else {
612 buwrite(bp);
613 }
d89ce96a 614 }
9fc94b5f 615
7a2de9a4 616 if (bp->b_error) {
2cd8c774 617 kprintf("tmpfs_write bwrite error %d\n", bp->b_error);
7a2de9a4
MD
618 break;
619 }
620 }
7a2de9a4 621
b5d16701 622 rel_mplock();
7a2de9a4
MD
623
624 if (error) {
80ae59d7 625 if (extended) {
7a2de9a4 626 (void)tmpfs_reg_resize(vp, oldsize, trivial);
80ae59d7
MD
627 kflags &= ~NOTE_EXTEND;
628 }
629 goto done;
7a2de9a4
MD
630 }
631
632 TMPFS_NODE_LOCK(node);
633 node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
634 (extended? TMPFS_NODE_CHANGED : 0);
635
636 if (node->tn_mode & (S_ISUID | S_ISGID)) {
637 if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0))
638 node->tn_mode &= ~(S_ISUID | S_ISGID);
639 }
640 TMPFS_NODE_UNLOCK(node);
80ae59d7 641done:
7a2de9a4 642
80ae59d7 643 tmpfs_knote(vp, kflags);
7a2de9a4
MD
644 return(error);
645}
646
647static int
648tmpfs_advlock (struct vop_advlock_args *ap)
649{
650 struct tmpfs_node *node;
651 struct vnode *vp = ap->a_vp;
652
653 node = VP_TO_TMPFS_NODE(vp);
654
655 return (lf_advlock(ap, &node->tn_advlock, node->tn_size));
656}
657
7a2de9a4
MD
658static int
659tmpfs_strategy(struct vop_strategy_args *ap)
660{
661 struct bio *bio = ap->a_bio;
9fc94b5f 662 struct buf *bp = bio->bio_buf;
7a2de9a4
MD
663 struct vnode *vp = ap->a_vp;
664 struct tmpfs_node *node;
665 vm_object_t uobj;
666
9fc94b5f
MD
667 if (vp->v_type != VREG) {
668 bp->b_resid = bp->b_bcount;
669 bp->b_flags |= B_ERROR | B_INVAL;
670 bp->b_error = EINVAL;
671 biodone(bio);
672 return(0);
673 }
7a2de9a4
MD
674
675 node = VP_TO_TMPFS_NODE(vp);
676
677 uobj = node->tn_reg.tn_aobj;
9fc94b5f 678
7a2de9a4 679 /*
9fc94b5f
MD
680 * Call swap_pager_strategy to read or write between the VM
681 * object and the buffer cache.
7a2de9a4
MD
682 */
683 swap_pager_strategy(uobj, bio);
684
685 return 0;
686}
687
688static int
689tmpfs_bmap(struct vop_bmap_args *ap)
690{
691 if (ap->a_doffsetp != NULL)
692 *ap->a_doffsetp = ap->a_loffset;
693 if (ap->a_runp != NULL)
694 *ap->a_runp = 0;
695 if (ap->a_runb != NULL)
696 *ap->a_runb = 0;
697
698 return 0;
699}
9fc94b5f 700
7a2de9a4
MD
701/* --------------------------------------------------------------------- */
702
703static int
704tmpfs_nremove(struct vop_nremove_args *v)
705{
706 struct vnode *dvp = v->a_dvp;
707 struct namecache *ncp = v->a_nch->ncp;
9fc94b5f 708 struct vnode *vp;
7a2de9a4
MD
709 int error;
710 struct tmpfs_dirent *de;
711 struct tmpfs_mount *tmp;
712 struct tmpfs_node *dnode;
713 struct tmpfs_node *node;
714
9fc94b5f
MD
715 /*
716 * We have to acquire the vp from v->a_nch because
717 * we will likely unresolve the namecache entry, and
718 * a vrele is needed to trigger the tmpfs_inactive/tmpfs_reclaim
719 * sequence to recover space from the file.
720 */
721 error = cache_vref(v->a_nch, v->a_cred, &vp);
722 KKASSERT(error == 0);
7a2de9a4
MD
723
724 if (vp->v_type == VDIR) {
725 error = EISDIR;
726 goto out;
727 }
728
729 dnode = VP_TO_TMPFS_DIR(dvp);
730 node = VP_TO_TMPFS_NODE(vp);
731 tmp = VFS_TO_TMPFS(vp->v_mount);
732 de = tmpfs_dir_lookup(dnode, node, ncp);
733 if (de == NULL) {
734 error = ENOENT;
735 goto out;
736 }
737
738 /* Files marked as immutable or append-only cannot be deleted. */
739 if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
740 (dnode->tn_flags & APPEND)) {
741 error = EPERM;
742 goto out;
743 }
744
745 /* Remove the entry from the directory; as it is a file, we do not
746 * have to change the number of hard links of the directory. */
22d3b394 747 tmpfs_dir_detach(dnode, de);
7a2de9a4
MD
748
749 /* Free the directory entry we just deleted. Note that the node
750 * referred by it will not be removed until the vnode is really
751 * reclaimed. */
0786baf1 752 tmpfs_free_dirent(tmp, de);
7a2de9a4
MD
753
754 if (node->tn_links > 0) {
755 TMPFS_NODE_LOCK(node);
756 node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
757 TMPFS_NODE_MODIFIED;
758 TMPFS_NODE_UNLOCK(node);
759 }
760
761 cache_setunresolved(v->a_nch);
762 cache_setvp(v->a_nch, NULL);
80ae59d7 763 tmpfs_knote(vp, NOTE_DELETE);
9fc94b5f 764 /*cache_inval_vp(vp, CINV_DESTROY);*/
80ae59d7 765 tmpfs_knote(dvp, NOTE_WRITE);
7a2de9a4
MD
766 error = 0;
767
7a2de9a4 768out:
9fc94b5f 769 vrele(vp);
7a2de9a4
MD
770
771 return error;
772}
773
774/* --------------------------------------------------------------------- */
775
776static int
777tmpfs_nlink(struct vop_nlink_args *v)
778{
779 struct vnode *dvp = v->a_dvp;
780 struct vnode *vp = v->a_vp;
781 struct namecache *ncp = v->a_nch->ncp;
7a2de9a4
MD
782 struct tmpfs_dirent *de;
783 struct tmpfs_node *node;
22d3b394
MD
784 struct tmpfs_node *dnode;
785 int error;
7a2de9a4 786
7a2de9a4
MD
787 KKASSERT(dvp != vp); /* XXX When can this be false? */
788
789 node = VP_TO_TMPFS_NODE(vp);
22d3b394 790 dnode = VP_TO_TMPFS_NODE(dvp);
7a2de9a4
MD
791
792 /* XXX: Why aren't the following two tests done by the caller? */
793
794 /* Hard links of directories are forbidden. */
795 if (vp->v_type == VDIR) {
796 error = EPERM;
797 goto out;
798 }
799
800 /* Cannot create cross-device links. */
801 if (dvp->v_mount != vp->v_mount) {
802 error = EXDEV;
803 goto out;
804 }
805
806 /* Ensure that we do not overflow the maximum number of links imposed
807 * by the system. */
808 KKASSERT(node->tn_links <= LINK_MAX);
809 if (node->tn_links == LINK_MAX) {
810 error = EMLINK;
811 goto out;
812 }
813
814 /* We cannot create links of files marked immutable or append-only. */
815 if (node->tn_flags & (IMMUTABLE | APPEND)) {
816 error = EPERM;
817 goto out;
818 }
819
820 /* Allocate a new directory entry to represent the node. */
821 error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
822 ncp->nc_name, ncp->nc_nlen, &de);
823 if (error != 0)
824 goto out;
825
826 /* Insert the new directory entry into the appropriate directory. */
22d3b394 827 tmpfs_dir_attach(dnode, de);
7a2de9a4
MD
828
829 /* vp link count has changed, so update node times. */
830
831 TMPFS_NODE_LOCK(node);
832 node->tn_status |= TMPFS_NODE_CHANGED;
833 TMPFS_NODE_UNLOCK(node);
834 tmpfs_update(vp);
835
80ae59d7 836 tmpfs_knote(vp, NOTE_LINK);
7a2de9a4
MD
837 cache_setunresolved(v->a_nch);
838 cache_setvp(v->a_nch, vp);
80ae59d7 839 tmpfs_knote(dvp, NOTE_WRITE);
7a2de9a4
MD
840 error = 0;
841
842out:
7a2de9a4
MD
843 return error;
844}
845
846/* --------------------------------------------------------------------- */
847
848static int
849tmpfs_nrename(struct vop_nrename_args *v)
850{
851 struct vnode *fdvp = v->a_fdvp;
852 struct namecache *fncp = v->a_fnch->ncp;
853 struct vnode *fvp = fncp->nc_vp;
854 struct vnode *tdvp = v->a_tdvp;
855 struct namecache *tncp = v->a_tnch->ncp;
856 struct vnode *tvp = tncp->nc_vp;
7a2de9a4
MD
857 struct tmpfs_dirent *de;
858 struct tmpfs_mount *tmp;
859 struct tmpfs_node *fdnode;
860 struct tmpfs_node *fnode;
861 struct tmpfs_node *tnode;
862 struct tmpfs_node *tdnode;
22d3b394 863 char *newname;
dca262fb 864 char *oldname;
22d3b394 865 int error;
7a2de9a4 866
7a2de9a4
MD
867 tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp);
868
869 /* Disallow cross-device renames.
870 * XXX Why isn't this done by the caller? */
871 if (fvp->v_mount != tdvp->v_mount ||
872 (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
873 error = EXDEV;
874 goto out;
875 }
876
877 tmp = VFS_TO_TMPFS(tdvp->v_mount);
878 tdnode = VP_TO_TMPFS_DIR(tdvp);
879
880 /* If source and target are the same file, there is nothing to do. */
881 if (fvp == tvp) {
882 error = 0;
883 goto out;
884 }
885
7a2de9a4
MD
886 fdnode = VP_TO_TMPFS_DIR(fdvp);
887 fnode = VP_TO_TMPFS_NODE(fvp);
888 de = tmpfs_dir_lookup(fdnode, fnode, fncp);
889
890 /* Avoid manipulating '.' and '..' entries. */
891 if (de == NULL) {
892 error = ENOENT;
893 goto out_locked;
894 }
895 KKASSERT(de->td_node == fnode);
896
dca262fb
MD
897 /*
898 * If replacing an entry in the target directory and that entry
899 * is a directory, it must be empty.
900 *
7a2de9a4 901 * Kern_rename gurantees the destination to be a directory
dca262fb
MD
902 * if the source is one (it does?).
903 */
7a2de9a4
MD
904 if (tvp != NULL) {
905 KKASSERT(tnode != NULL);
906
907 if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
908 (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
909 error = EPERM;
910 goto out_locked;
911 }
912
913 if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
914 if (tnode->tn_size > 0) {
915 error = ENOTEMPTY;
916 goto out_locked;
917 }
918 } else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
919 error = ENOTDIR;
920 goto out_locked;
921 } else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
922 error = EISDIR;
923 goto out_locked;
924 } else {
925 KKASSERT(fnode->tn_type != VDIR &&
926 tnode->tn_type != VDIR);
927 }
928 }
929
dca262fb
MD
930 if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
931 (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
7a2de9a4
MD
932 error = EPERM;
933 goto out_locked;
934 }
935
dca262fb
MD
936 /*
937 * Ensure that we have enough memory to hold the new name, if it
938 * has to be changed.
939 */
7a2de9a4
MD
940 if (fncp->nc_nlen != tncp->nc_nlen ||
941 bcmp(fncp->nc_name, tncp->nc_name, fncp->nc_nlen) != 0) {
d00cd01c 942 newname = kmalloc(tncp->nc_nlen + 1, tmp->tm_name_zone,
42f6f6b1
VS
943 M_WAITOK | M_NULLOK);
944 if (newname == NULL) {
945 error = ENOSPC;
946 goto out_locked;
947 }
dca262fb
MD
948 bcopy(tncp->nc_name, newname, tncp->nc_nlen);
949 newname[tncp->nc_nlen] = '\0';
950 } else {
7a2de9a4 951 newname = NULL;
dca262fb 952 }
7a2de9a4 953
dca262fb
MD
954 /*
955 * Unlink entry from source directory. Note that the kernel has
956 * already checked for illegal recursion cases (renaming a directory
957 * into a subdirectory of itself).
958 */
959 if (fdnode != tdnode)
960 tmpfs_dir_detach(fdnode, de);
961
962 /*
963 * Handle any name change. Swap with newname, we will
964 * deallocate it at the end.
965 */
966 if (newname != NULL) {
967#if 0
968 TMPFS_NODE_LOCK(fnode);
969 fnode->tn_status |= TMPFS_NODE_CHANGED;
970 TMPFS_NODE_UNLOCK(fnode);
971#endif
972 oldname = de->td_name;
973 de->td_name = newname;
974 de->td_namelen = (uint16_t)tncp->nc_nlen;
975 newname = oldname;
976 }
977
978 /*
979 * Link entry to target directory. If the entry
980 * represents a directory move the parent linkage
981 * as well.
982 */
7a2de9a4 983 if (fdnode != tdnode) {
7a2de9a4 984 if (de->td_node->tn_type == VDIR) {
7a2de9a4 985 TMPFS_VALIDATE_DIR(fnode);
7a2de9a4 986
7a2de9a4 987 TMPFS_NODE_LOCK(tdnode);
7a2de9a4 988 tdnode->tn_links++;
dca262fb
MD
989 tdnode->tn_status |= TMPFS_NODE_MODIFIED;
990 TMPFS_NODE_UNLOCK(tdnode);
7a2de9a4 991
dca262fb
MD
992 TMPFS_NODE_LOCK(fnode);
993 fnode->tn_dir.tn_parent = tdnode;
994 fnode->tn_status |= TMPFS_NODE_CHANGED;
995 TMPFS_NODE_UNLOCK(fnode);
996
997 TMPFS_NODE_LOCK(fdnode);
998 fdnode->tn_links--;
999 fdnode->tn_status |= TMPFS_NODE_MODIFIED;
7a2de9a4 1000 TMPFS_NODE_UNLOCK(fdnode);
7a2de9a4 1001 }
22d3b394 1002 tmpfs_dir_attach(tdnode, de);
dca262fb 1003 } else {
7a2de9a4 1004 TMPFS_NODE_LOCK(tdnode);
7a2de9a4 1005 tdnode->tn_status |= TMPFS_NODE_MODIFIED;
7a2de9a4
MD
1006 TMPFS_NODE_UNLOCK(tdnode);
1007 }
1008
dca262fb
MD
1009 /*
1010 * If we are overwriting an entry, we have to remove the old one
1011 * from the target directory.
1012 */
7a2de9a4
MD
1013 if (tvp != NULL) {
1014 /* Remove the old entry from the target directory. */
1015 de = tmpfs_dir_lookup(tdnode, tnode, tncp);
22d3b394 1016 tmpfs_dir_detach(tdnode, de);
80ae59d7 1017 tmpfs_knote(tdnode->tn_vnode, NOTE_DELETE);
7a2de9a4 1018
dca262fb
MD
1019 /*
1020 * Free the directory entry we just deleted. Note that the
7a2de9a4 1021 * node referred by it will not be removed until the vnode is
dca262fb
MD
1022 * really reclaimed.
1023 */
0786baf1 1024 tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), de);
9fc94b5f 1025 /*cache_inval_vp(tvp, CINV_DESTROY);*/
7a2de9a4
MD
1026 }
1027
dca262fb
MD
1028 /*
1029 * Finish up
1030 */
1031 if (newname) {
d00cd01c 1032 kfree(newname, tmp->tm_name_zone);
dca262fb
MD
1033 newname = NULL;
1034 }
7a2de9a4 1035 cache_rename(v->a_fnch, v->a_tnch);
80ae59d7
MD
1036 tmpfs_knote(v->a_fdvp, NOTE_WRITE);
1037 tmpfs_knote(v->a_tdvp, NOTE_WRITE);
1038 if (fnode->tn_vnode)
1039 tmpfs_knote(fnode->tn_vnode, NOTE_RENAME);
7a2de9a4
MD
1040 error = 0;
1041
1042out_locked:
630e3a33 1043 ;
7a2de9a4
MD
1044
1045out:
1046 /* Release target nodes. */
1047 /* XXX: I don't understand when tdvp can be the same as tvp, but
1048 * other code takes care of this... */
1049 if (tdvp == tvp)
1050 vrele(tdvp);
7a2de9a4
MD
1051
1052 return error;
1053}
1054
1055/* --------------------------------------------------------------------- */
1056
1057static int
1058tmpfs_nmkdir(struct vop_nmkdir_args *v)
1059{
1060 struct vnode *dvp = v->a_dvp;
1061 struct vnode **vpp = v->a_vpp;
1062 struct namecache *ncp = v->a_nch->ncp;
1063 struct vattr *vap = v->a_vap;
1064 struct ucred *cred = v->a_cred;
1065 int error;
1066
1067 KKASSERT(vap->va_type == VDIR);
1068
7a2de9a4
MD
1069 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
1070 if (error == 0) {
1071 cache_setunresolved(v->a_nch);
1072 cache_setvp(v->a_nch, *vpp);
80ae59d7 1073 tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
7a2de9a4 1074 }
7a2de9a4
MD
1075
1076 return error;
1077}
1078
1079/* --------------------------------------------------------------------- */
1080
1081static int
1082tmpfs_nrmdir(struct vop_nrmdir_args *v)
1083{
1084 struct vnode *dvp = v->a_dvp;
1085 struct namecache *ncp = v->a_nch->ncp;
9fc94b5f 1086 struct vnode *vp;
7a2de9a4
MD
1087 struct tmpfs_dirent *de;
1088 struct tmpfs_mount *tmp;
1089 struct tmpfs_node *dnode;
1090 struct tmpfs_node *node;
38e5e604
MD
1091 int error;
1092
1093 /*
9fc94b5f
MD
1094 * We have to acquire the vp from v->a_nch because
1095 * we will likely unresolve the namecache entry, and
1096 * a vrele is needed to trigger the tmpfs_inactive/tmpfs_reclaim
1097 * sequence.
1098 */
1099 error = cache_vref(v->a_nch, v->a_cred, &vp);
1100 KKASSERT(error == 0);
7a2de9a4 1101
e527fb6b
MD
1102 /*
1103 * Prevalidate so we don't hit an assertion later
1104 */
1105 if (vp->v_type != VDIR) {
1106 error = ENOTDIR;
1107 goto out;
1108 }
1109
7a2de9a4
MD
1110 tmp = VFS_TO_TMPFS(dvp->v_mount);
1111 dnode = VP_TO_TMPFS_DIR(dvp);
1112 node = VP_TO_TMPFS_DIR(vp);
1113
1114 /* Directories with more than two entries ('.' and '..') cannot be
1115 * removed. */
1116 if (node->tn_size > 0) {
1117 error = ENOTEMPTY;
1118 goto out;
1119 }
1120
1121 if ((dnode->tn_flags & APPEND)
1122 || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1123 error = EPERM;
1124 goto out;
1125 }
1126
1127 /* This invariant holds only if we are not trying to remove "..".
1128 * We checked for that above so this is safe now. */
1129 KKASSERT(node->tn_dir.tn_parent == dnode);
1130
1131 /* Get the directory entry associated with node (vp). This was
1132 * filled by tmpfs_lookup while looking up the entry. */
1133 de = tmpfs_dir_lookup(dnode, node, ncp);
1134 KKASSERT(TMPFS_DIRENT_MATCHES(de,
1135 ncp->nc_name,
1136 ncp->nc_nlen));
1137
1138 /* Check flags to see if we are allowed to remove the directory. */
b7fe63af
MD
1139 if ((dnode->tn_flags & APPEND) ||
1140 node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) {
7a2de9a4
MD
1141 error = EPERM;
1142 goto out;
1143 }
1144
1145
1146 /* Detach the directory entry from the directory (dnode). */
22d3b394 1147 tmpfs_dir_detach(dnode, de);
7a2de9a4
MD
1148
1149 /* No vnode should be allocated for this entry from this point */
1150 TMPFS_NODE_LOCK(node);
1151 TMPFS_ASSERT_ELOCKED(node);
1152 TMPFS_NODE_LOCK(dnode);
1153 TMPFS_ASSERT_ELOCKED(dnode);
1154
0786baf1
MD
1155#if 0
1156 /* handled by tmpfs_free_node */
1157 KKASSERT(node->tn_links > 0);
7a2de9a4
MD
1158 node->tn_links--;
1159 node->tn_dir.tn_parent = NULL;
0786baf1 1160#endif
7a2de9a4
MD
1161 node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
1162 TMPFS_NODE_MODIFIED;
1163
0786baf1
MD
1164#if 0
1165 /* handled by tmpfs_free_node */
1166 KKASSERT(dnode->tn_links > 0);
7a2de9a4 1167 dnode->tn_links--;
0786baf1 1168#endif
7a2de9a4
MD
1169 dnode->tn_status |= TMPFS_NODE_ACCESSED | \
1170 TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
1171
1172 TMPFS_NODE_UNLOCK(dnode);
1173 TMPFS_NODE_UNLOCK(node);
1174
1175 /* Free the directory entry we just deleted. Note that the node
1176 * referred by it will not be removed until the vnode is really
1177 * reclaimed. */
0786baf1 1178 tmpfs_free_dirent(tmp, de);
7a2de9a4
MD
1179
1180 /* Release the deleted vnode (will destroy the node, notify
1181 * interested parties and clean it from the cache). */
1182
1183 TMPFS_NODE_LOCK(dnode);
1184 dnode->tn_status |= TMPFS_NODE_CHANGED;
1185 TMPFS_NODE_UNLOCK(dnode);
1186 tmpfs_update(dvp);
1187
1188 cache_setunresolved(v->a_nch);
1189 cache_setvp(v->a_nch, NULL);
9fc94b5f 1190 /*cache_inval_vp(vp, CINV_DESTROY);*/
80ae59d7 1191 tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
7a2de9a4
MD
1192 error = 0;
1193
1194out:
9fc94b5f 1195 vrele(vp);
7a2de9a4
MD
1196
1197 return error;
1198}
1199
1200/* --------------------------------------------------------------------- */
1201
1202static int
1203tmpfs_nsymlink(struct vop_nsymlink_args *v)
1204{
1205 struct vnode *dvp = v->a_dvp;
1206 struct vnode **vpp = v->a_vpp;
1207 struct namecache *ncp = v->a_nch->ncp;
1208 struct vattr *vap = v->a_vap;
1209 struct ucred *cred = v->a_cred;
1210 char *target = v->a_target;
1211 int error;
1212
7a2de9a4
MD
1213 vap->va_type = VLNK;
1214 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, target);
1215 if (error == 0) {
80ae59d7 1216 tmpfs_knote(*vpp, NOTE_WRITE);
7a2de9a4
MD
1217 cache_setunresolved(v->a_nch);
1218 cache_setvp(v->a_nch, *vpp);
1219 }
7a2de9a4
MD
1220
1221 return error;
1222}
1223
1224/* --------------------------------------------------------------------- */
1225
1226static int
1227tmpfs_readdir(struct vop_readdir_args *v)
1228{
1229 struct vnode *vp = v->a_vp;
1230 struct uio *uio = v->a_uio;
1231 int *eofflag = v->a_eofflag;
1232 off_t **cookies = v->a_cookies;
1233 int *ncookies = v->a_ncookies;
22d3b394 1234 struct tmpfs_mount *tmp;
7a2de9a4
MD
1235 int error;
1236 off_t startoff;
1237 off_t cnt = 0;
1238 struct tmpfs_node *node;
1239
1240 /* This operation only makes sense on directory nodes. */
1241 if (vp->v_type != VDIR)
1242 return ENOTDIR;
1243
22d3b394 1244 tmp = VFS_TO_TMPFS(vp->v_mount);
7a2de9a4
MD
1245 node = VP_TO_TMPFS_DIR(vp);
1246 startoff = uio->uio_offset;
1247
1248 if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) {
1249 error = tmpfs_dir_getdotdent(node, uio);
1250 if (error != 0)
1251 goto outok;
1252 cnt++;
1253 }
1254
1255 if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) {
22d3b394 1256 error = tmpfs_dir_getdotdotdent(tmp, node, uio);
7a2de9a4
MD
1257 if (error != 0)
1258 goto outok;
1259 cnt++;
1260 }
1261
1262 error = tmpfs_dir_getdents(node, uio, &cnt);
1263
1264outok:
1265 KKASSERT(error >= -1);
1266
1267 if (error == -1)
1268 error = 0;
1269
1270 if (eofflag != NULL)
1271 *eofflag =
1272 (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
1273
1274 /* Update NFS-related variables. */
1275 if (error == 0 && cookies != NULL && ncookies != NULL) {
1276 off_t i;
1277 off_t off = startoff;
1278 struct tmpfs_dirent *de = NULL;
1279
1280 *ncookies = cnt;
1281 *cookies = kmalloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
1282
1283 for (i = 0; i < cnt; i++) {
1284 KKASSERT(off != TMPFS_DIRCOOKIE_EOF);
1285 if (off == TMPFS_DIRCOOKIE_DOT) {
1286 off = TMPFS_DIRCOOKIE_DOTDOT;
1287 } else {
1288 if (off == TMPFS_DIRCOOKIE_DOTDOT) {
1289 de = TAILQ_FIRST(&node->tn_dir.tn_dirhead);
1290 } else if (de != NULL) {
1291 de = TAILQ_NEXT(de, td_entries);
1292 } else {
1293 de = tmpfs_dir_lookupbycookie(node,
1294 off);
1295 KKASSERT(de != NULL);
1296 de = TAILQ_NEXT(de, td_entries);
1297 }
1298 if (de == NULL)
1299 off = TMPFS_DIRCOOKIE_EOF;
1300 else
1301 off = tmpfs_dircookie(de);
1302 }
1303
1304 (*cookies)[i] = off;
1305 }
1306 KKASSERT(uio->uio_offset == off);
1307 }
7a2de9a4
MD
1308
1309 return error;
1310}
1311
1312/* --------------------------------------------------------------------- */
1313
1314static int
1315tmpfs_readlink(struct vop_readlink_args *v)
1316{
1317 struct vnode *vp = v->a_vp;
1318 struct uio *uio = v->a_uio;
1319
1320 int error;
1321 struct tmpfs_node *node;
1322
1323 KKASSERT(uio->uio_offset == 0);
1324 KKASSERT(vp->v_type == VLNK);
1325
1326 node = VP_TO_TMPFS_NODE(vp);
1327
1328 error = uiomove(node->tn_link, MIN(node->tn_size, uio->uio_resid),
1329 uio);
1330 TMPFS_NODE_LOCK(node);
1331 node->tn_status |= TMPFS_NODE_ACCESSED;
1332 TMPFS_NODE_UNLOCK(node);
1333
1334 return error;
1335}
1336
1337/* --------------------------------------------------------------------- */
1338
1339static int
1340tmpfs_inactive(struct vop_inactive_args *v)
1341{
1342 struct vnode *vp = v->a_vp;
1343
1344 struct tmpfs_node *node;
1345
7a2de9a4
MD
1346 node = VP_TO_TMPFS_NODE(vp);
1347
9fc94b5f
MD
1348 /*
1349 * Get rid of unreferenced deleted vnodes sooner rather than
1350 * later so the data memory can be recovered immediately.
f96f2f39
MD
1351 *
1352 * We must truncate the vnode to prevent the normal reclamation
1353 * path from flushing the data for the removed file to disk.
9fc94b5f 1354 */
7a2de9a4 1355 TMPFS_NODE_LOCK(node);
b7fe63af
MD
1356 if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 &&
1357 (node->tn_links == 0 ||
1358 (node->tn_links == 1 && node->tn_type == VDIR &&
1359 node->tn_dir.tn_parent)))
1360 {
9fc94b5f 1361 node->tn_vpstate = TMPFS_VNODE_DOOMED;
7a2de9a4 1362 TMPFS_NODE_UNLOCK(node);
f96f2f39
MD
1363 if (node->tn_type == VREG)
1364 tmpfs_truncate(vp, 0);
7a2de9a4 1365 vrecycle(vp);
9fc94b5f 1366 } else {
7a2de9a4 1367 TMPFS_NODE_UNLOCK(node);
9fc94b5f 1368 }
7a2de9a4
MD
1369
1370 return 0;
1371}
1372
1373/* --------------------------------------------------------------------- */
1374
1375int
1376tmpfs_reclaim(struct vop_reclaim_args *v)
1377{
1378 struct vnode *vp = v->a_vp;
7a2de9a4
MD
1379 struct tmpfs_mount *tmp;
1380 struct tmpfs_node *node;
1381
1382 node = VP_TO_TMPFS_NODE(vp);
1383 tmp = VFS_TO_TMPFS(vp->v_mount);
1384
7a2de9a4
MD
1385 tmpfs_free_vp(vp);
1386
b7fe63af
MD
1387 /*
1388 * If the node referenced by this vnode was deleted by the
1389 * user, we must free its associated data structures now that
1390 * the vnode is being reclaimed.
1391 *
1392 * Directories have an extra link ref.
1393 */
7a2de9a4 1394 TMPFS_NODE_LOCK(node);
b7fe63af
MD
1395 if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 &&
1396 (node->tn_links == 0 ||
1397 (node->tn_links == 1 && node->tn_type == VDIR &&
1398 node->tn_dir.tn_parent)))
1399 {
7a2de9a4 1400 node->tn_vpstate = TMPFS_VNODE_DOOMED;
7a2de9a4 1401 tmpfs_free_node(tmp, node);
0786baf1 1402 /* eats the lock */
9fc94b5f 1403 } else {
7a2de9a4 1404 TMPFS_NODE_UNLOCK(node);
9fc94b5f 1405 }
7a2de9a4
MD
1406
1407 KKASSERT(vp->v_data == NULL);
1408 return 0;
1409}
1410
1411/* --------------------------------------------------------------------- */
1412
1413static int
1414tmpfs_print(struct vop_print_args *v)
1415{
1416 struct vnode *vp = v->a_vp;
1417
1418 struct tmpfs_node *node;
1419
1420 node = VP_TO_TMPFS_NODE(vp);
1421
1422 kprintf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n",
1423 node, node->tn_flags, node->tn_links);
1424 kprintf("\tmode 0%o, owner %d, group %d, size %ju, status 0x%x\n",
1425 node->tn_mode, node->tn_uid, node->tn_gid,
1426 (uintmax_t)node->tn_size, node->tn_status);
1427
1428 if (vp->v_type == VFIFO)
1429 fifo_printinfo(vp);
1430
1431 kprintf("\n");
1432
1433 return 0;
1434}
1435
1436/* --------------------------------------------------------------------- */
1437
1438static int
1439tmpfs_pathconf(struct vop_pathconf_args *v)
1440{
1441 int name = v->a_name;
1442 register_t *retval = v->a_retval;
1443
1444 int error;
1445
1446 error = 0;
1447
1448 switch (name) {
1449 case _PC_LINK_MAX:
1450 *retval = LINK_MAX;
1451 break;
1452
1453 case _PC_NAME_MAX:
1454 *retval = NAME_MAX;
1455 break;
1456
1457 case _PC_PATH_MAX:
1458 *retval = PATH_MAX;
1459 break;
1460
1461 case _PC_PIPE_BUF:
1462 *retval = PIPE_BUF;
1463 break;
1464
1465 case _PC_CHOWN_RESTRICTED:
1466 *retval = 1;
1467 break;
1468
1469 case _PC_NO_TRUNC:
1470 *retval = 1;
1471 break;
1472
1473 case _PC_SYNC_IO:
1474 *retval = 1;
1475 break;
1476
1477 case _PC_FILESIZEBITS:
1478 *retval = 0; /* XXX Don't know which value should I return. */
1479 break;
1480
1481 default:
1482 error = EINVAL;
1483 }
1484
1485 return error;
1486}
1487
80ae59d7
MD
1488/************************************************************************
1489 * KQFILTER OPS *
1490 ************************************************************************/
1491
1492static void filt_tmpfsdetach(struct knote *kn);
1493static int filt_tmpfsread(struct knote *kn, long hint);
1494static int filt_tmpfswrite(struct knote *kn, long hint);
1495static int filt_tmpfsvnode(struct knote *kn, long hint);
1496
1497static struct filterops tmpfsread_filtops =
1498 { FILTEROP_ISFD, NULL, filt_tmpfsdetach, filt_tmpfsread };
1499static struct filterops tmpfswrite_filtops =
1500 { FILTEROP_ISFD, NULL, filt_tmpfsdetach, filt_tmpfswrite };
1501static struct filterops tmpfsvnode_filtops =
1502 { FILTEROP_ISFD, NULL, filt_tmpfsdetach, filt_tmpfsvnode };
1503
1504static int
1505tmpfs_kqfilter (struct vop_kqfilter_args *ap)
1506{
1507 struct vnode *vp = ap->a_vp;
1508 struct knote *kn = ap->a_kn;
1509
1510 switch (kn->kn_filter) {
1511 case EVFILT_READ:
1512 kn->kn_fop = &tmpfsread_filtops;
1513 break;
1514 case EVFILT_WRITE:
1515 kn->kn_fop = &tmpfswrite_filtops;
1516 break;
1517 case EVFILT_VNODE:
1518 kn->kn_fop = &tmpfsvnode_filtops;
1519 break;
1520 default:
1521 return (EOPNOTSUPP);
1522 }
1523
1524 kn->kn_hook = (caddr_t)vp;
1525
1526 knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1527
1528 return(0);
1529}
1530
1531static void
1532filt_tmpfsdetach(struct knote *kn)
1533{
1534 struct vnode *vp = (void *)kn->kn_hook;
1535
1536 knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1537}
1538
1539static int
1540filt_tmpfsread(struct knote *kn, long hint)
1541{
1542 struct vnode *vp = (void *)kn->kn_hook;
1543 struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
1544 off_t off;
1545
1546 if (hint == NOTE_REVOKE) {
1547 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
1548 return(1);
1549 }
1550 off = node->tn_size - kn->kn_fp->f_offset;
1551 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
1552 if (kn->kn_sfflags & NOTE_OLDAPI)
1553 return(1);
1554
1555 /*
1556 * Handle possible MP race interlock on filter check/write
1557 */
1558 if (kn->kn_data == 0) {
1559 get_mplock();
1560 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
1561 rel_mplock();
1562 }
1563 return (kn->kn_data != 0);
1564}
1565
1566static int
1567filt_tmpfswrite(struct knote *kn, long hint)
1568{
1569 if (hint == NOTE_REVOKE)
1570 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
1571 kn->kn_data = 0;
1572 return (1);
1573}
1574
1575static int
1576filt_tmpfsvnode(struct knote *kn, long hint)
1577{
1578 if (kn->kn_sfflags & hint)
1579 kn->kn_fflags |= hint;
1580 if (hint == NOTE_REVOKE) {
1581 kn->kn_flags |= EV_EOF;
1582 return (1);
1583 }
1584 return (kn->kn_fflags != 0);
1585}
1586
1587
7a2de9a4
MD
1588/* --------------------------------------------------------------------- */
1589
1590/*
1591 * vnode operations vector used for files stored in a tmpfs file system.
1592 */
1593struct vop_ops tmpfs_vnode_vops = {
1594 .vop_default = vop_defaultop,
1595 .vop_getpages = vop_stdgetpages,
1596 .vop_putpages = vop_stdputpages,
1597 .vop_ncreate = tmpfs_ncreate,
1598 .vop_nresolve = tmpfs_nresolve,
1599 .vop_nlookupdotdot = tmpfs_nlookupdotdot,
1600 .vop_nmknod = tmpfs_nmknod,
1601 .vop_open = tmpfs_open,
1602 .vop_close = tmpfs_close,
1603 .vop_access = tmpfs_access,
1604 .vop_getattr = tmpfs_getattr,
1605 .vop_setattr = tmpfs_setattr,
1606 .vop_read = tmpfs_read,
1607 .vop_write = tmpfs_write,
1608 .vop_fsync = tmpfs_fsync,
1609 .vop_nremove = tmpfs_nremove,
1610 .vop_nlink = tmpfs_nlink,
1611 .vop_nrename = tmpfs_nrename,
1612 .vop_nmkdir = tmpfs_nmkdir,
1613 .vop_nrmdir = tmpfs_nrmdir,
1614 .vop_nsymlink = tmpfs_nsymlink,
1615 .vop_readdir = tmpfs_readdir,
1616 .vop_readlink = tmpfs_readlink,
1617 .vop_inactive = tmpfs_inactive,
1618 .vop_reclaim = tmpfs_reclaim,
1619 .vop_print = tmpfs_print,
1620 .vop_pathconf = tmpfs_pathconf,
9fc94b5f 1621 .vop_bmap = tmpfs_bmap,
7a2de9a4
MD
1622 .vop_strategy = tmpfs_strategy,
1623 .vop_advlock = tmpfs_advlock,
80ae59d7 1624 .vop_kqfilter = tmpfs_kqfilter
7a2de9a4 1625};