proc->thread stage 3.5: Add an IO_CORE flag so coda doesn't have to dig
[dragonfly.git] / sys / kern / vfs_syscalls.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94
39 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $
40 * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.3 2003/06/23 17:55:41 dillon Exp $
41 */
42
43/* For 4.3 integer FS ID compatibility */
44#include "opt_compat.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/buf.h>
49#include <sys/sysent.h>
50#include <sys/malloc.h>
51#include <sys/mount.h>
52#include <sys/sysproto.h>
53#include <sys/namei.h>
54#include <sys/filedesc.h>
55#include <sys/kernel.h>
56#include <sys/fcntl.h>
57#include <sys/file.h>
58#include <sys/linker.h>
59#include <sys/stat.h>
60#include <sys/unistd.h>
61#include <sys/vnode.h>
62#include <sys/proc.h>
63#include <sys/dirent.h>
64#include <sys/extattr.h>
65
66#include <machine/limits.h>
67#include <miscfs/union/union.h>
68#include <sys/sysctl.h>
69#include <vm/vm.h>
70#include <vm/vm_object.h>
71#include <vm/vm_zone.h>
72#include <vm/vm_page.h>
73
74static int change_dir __P((struct nameidata *ndp, struct proc *p));
75static void checkdirs __P((struct vnode *olddp));
76static int chroot_refuse_vdir_fds __P((struct filedesc *fdp));
77static int getutimes __P((const struct timeval *, struct timespec *));
78static int setfown __P((struct vnode *, uid_t, gid_t));
79static int setfmode __P((struct vnode *, int));
80static int setfflags __P((struct vnode *, int));
81static int setutimes __P((struct vnode *, const struct timespec *, int));
82static int usermount = 0; /* if 1, non-root can mount fs. */
83
84int (*union_dircheckp) __P((struct proc *, struct vnode **, struct file *));
85
86SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
87
88/*
89 * Virtual File System System Calls
90 */
91
92/*
93 * Mount a file system.
94 */
95#ifndef _SYS_SYSPROTO_H_
96struct mount_args {
97 char *type;
98 char *path;
99 int flags;
100 caddr_t data;
101};
102#endif
103/*
104 * mount_args(char *type, char *path, int flags, caddr_t data)
105 */
106/* ARGSUSED */
107int
108mount(struct mount_args *uap)
109{
110 struct proc *p = curproc;
111 struct vnode *vp;
112 struct mount *mp;
113 struct vfsconf *vfsp;
114 int error, flag = 0, flag2 = 0;
115 struct vattr va;
116#ifdef COMPAT_43
117 u_long fstypenum;
118#endif
119 struct nameidata nd;
120 char fstypename[MFSNAMELEN];
121
122 if (usermount == 0 && (error = suser()))
123 return (error);
124 /*
125 * Do not allow NFS export by non-root users.
126 */
127 if (SCARG(uap, flags) & MNT_EXPORTED) {
128 error = suser();
129 if (error)
130 return (error);
131 }
132 /*
133 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
134 */
135 if (suser_xxx(p->p_ucred, 0))
136 SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
137 /*
138 * Get vnode to be covered
139 */
140 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
141 SCARG(uap, path), p);
142 if ((error = namei(&nd)) != 0)
143 return (error);
144 NDFREE(&nd, NDF_ONLY_PNBUF);
145 vp = nd.ni_vp;
146 if (SCARG(uap, flags) & MNT_UPDATE) {
147 if ((vp->v_flag & VROOT) == 0) {
148 vput(vp);
149 return (EINVAL);
150 }
151 mp = vp->v_mount;
152 flag = mp->mnt_flag;
153 flag2 = mp->mnt_kern_flag;
154 /*
155 * We only allow the filesystem to be reloaded if it
156 * is currently mounted read-only.
157 */
158 if ((SCARG(uap, flags) & MNT_RELOAD) &&
159 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
160 vput(vp);
161 return (EOPNOTSUPP); /* Needs translation */
162 }
163 /*
164 * Only root, or the user that did the original mount is
165 * permitted to update it.
166 */
167 if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid &&
168 (error = suser())) {
169 vput(vp);
170 return (error);
171 }
172 if (vfs_busy(mp, LK_NOWAIT, 0, p)) {
173 vput(vp);
174 return (EBUSY);
175 }
176 simple_lock(&vp->v_interlock);
177 if ((vp->v_flag & VMOUNT) != 0 ||
178 vp->v_mountedhere != NULL) {
179 simple_unlock(&vp->v_interlock);
180 vfs_unbusy(mp, p);
181 vput(vp);
182 return (EBUSY);
183 }
184 vp->v_flag |= VMOUNT;
185 simple_unlock(&vp->v_interlock);
186 mp->mnt_flag |=
187 SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
188 VOP_UNLOCK(vp, 0, p);
189 goto update;
190 }
191 /*
192 * If the user is not root, ensure that they own the directory
193 * onto which we are attempting to mount.
194 */
195 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) ||
196 (va.va_uid != p->p_ucred->cr_uid &&
197 (error = suser()))) {
198 vput(vp);
199 return (error);
200 }
201 if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) != 0) {
202 vput(vp);
203 return (error);
204 }
205 if (vp->v_type != VDIR) {
206 vput(vp);
207 return (ENOTDIR);
208 }
209#ifdef COMPAT_43
210 /*
211 * Historically filesystem types were identified by number. If we
212 * get an integer for the filesystem type instead of a string, we
213 * check to see if it matches one of the historic filesystem types.
214 */
215 fstypenum = (uintptr_t)SCARG(uap, type);
216 if (fstypenum < maxvfsconf) {
217 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
218 if (vfsp->vfc_typenum == fstypenum)
219 break;
220 if (vfsp == NULL) {
221 vput(vp);
222 return (ENODEV);
223 }
224 strncpy(fstypename, vfsp->vfc_name, MFSNAMELEN);
225 } else
226#endif /* COMPAT_43 */
227 if ((error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL)) != 0) {
228 vput(vp);
229 return (error);
230 }
231 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
232 if (!strcmp(vfsp->vfc_name, fstypename))
233 break;
234 if (vfsp == NULL) {
235 linker_file_t lf;
236
237 /* Only load modules for root (very important!) */
238 if ((error = suser()) != 0) {
239 vput(vp);
240 return error;
241 }
242 error = linker_load_file(fstypename, &lf);
243 if (error || lf == NULL) {
244 vput(vp);
245 if (lf == NULL)
246 error = ENODEV;
247 return error;
248 }
249 lf->userrefs++;
250 /* lookup again, see if the VFS was loaded */
251 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
252 if (!strcmp(vfsp->vfc_name, fstypename))
253 break;
254 if (vfsp == NULL) {
255 lf->userrefs--;
256 linker_file_unload(lf);
257 vput(vp);
258 return (ENODEV);
259 }
260 }
261 simple_lock(&vp->v_interlock);
262 if ((vp->v_flag & VMOUNT) != 0 ||
263 vp->v_mountedhere != NULL) {
264 simple_unlock(&vp->v_interlock);
265 vput(vp);
266 return (EBUSY);
267 }
268 vp->v_flag |= VMOUNT;
269 simple_unlock(&vp->v_interlock);
270
271 /*
272 * Allocate and initialize the filesystem.
273 */
274 mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK);
275 bzero((char *)mp, (u_long)sizeof(struct mount));
276 TAILQ_INIT(&mp->mnt_nvnodelist);
277 TAILQ_INIT(&mp->mnt_reservedvnlist);
278 mp->mnt_nvnodelistsize = 0;
279 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
280 (void)vfs_busy(mp, LK_NOWAIT, 0, p);
281 mp->mnt_op = vfsp->vfc_vfsops;
282 mp->mnt_vfc = vfsp;
283 vfsp->vfc_refcount++;
284 mp->mnt_stat.f_type = vfsp->vfc_typenum;
285 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
286 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
287 mp->mnt_vnodecovered = vp;
288 mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
289 mp->mnt_iosize_max = DFLTPHYS;
290 VOP_UNLOCK(vp, 0, p);
291update:
292 /*
293 * Set the mount level flags.
294 */
295 if (SCARG(uap, flags) & MNT_RDONLY)
296 mp->mnt_flag |= MNT_RDONLY;
297 else if (mp->mnt_flag & MNT_RDONLY)
298 mp->mnt_kern_flag |= MNTK_WANTRDWR;
299 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
300 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME |
301 MNT_NOSYMFOLLOW | MNT_IGNORE |
302 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
303 mp->mnt_flag |= SCARG(uap, flags) & (MNT_NOSUID | MNT_NOEXEC |
304 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE |
305 MNT_NOSYMFOLLOW | MNT_IGNORE |
306 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
307 /*
308 * Mount the filesystem.
309 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
310 * get. No freeing of cn_pnbuf.
311 */
312 error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, p);
313 if (mp->mnt_flag & MNT_UPDATE) {
314 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
315 mp->mnt_flag &= ~MNT_RDONLY;
316 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
317 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
318 if (error) {
319 mp->mnt_flag = flag;
320 mp->mnt_kern_flag = flag2;
321 }
322 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
323 if (mp->mnt_syncer == NULL)
324 error = vfs_allocate_syncvnode(mp);
325 } else {
326 if (mp->mnt_syncer != NULL)
327 vrele(mp->mnt_syncer);
328 mp->mnt_syncer = NULL;
329 }
330 vfs_unbusy(mp, p);
331 simple_lock(&vp->v_interlock);
332 vp->v_flag &= ~VMOUNT;
333 simple_unlock(&vp->v_interlock);
334 vrele(vp);
335 return (error);
336 }
337 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
338 /*
339 * Put the new filesystem on the mount list after root.
340 */
341 cache_purge(vp);
342 if (!error) {
343 simple_lock(&vp->v_interlock);
344 vp->v_flag &= ~VMOUNT;
345 vp->v_mountedhere = mp;
346 simple_unlock(&vp->v_interlock);
347 simple_lock(&mountlist_slock);
348 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
349 simple_unlock(&mountlist_slock);
350 checkdirs(vp);
351 VOP_UNLOCK(vp, 0, p);
352 if ((mp->mnt_flag & MNT_RDONLY) == 0)
353 error = vfs_allocate_syncvnode(mp);
354 vfs_unbusy(mp, p);
355 if ((error = VFS_START(mp, 0, p)) != 0)
356 vrele(vp);
357 } else {
358 simple_lock(&vp->v_interlock);
359 vp->v_flag &= ~VMOUNT;
360 simple_unlock(&vp->v_interlock);
361 mp->mnt_vfc->vfc_refcount--;
362 vfs_unbusy(mp, p);
363 free((caddr_t)mp, M_MOUNT);
364 vput(vp);
365 }
366 return (error);
367}
368
369/*
370 * Scan all active processes to see if any of them have a current
371 * or root directory onto which the new filesystem has just been
372 * mounted. If so, replace them with the new mount point.
373 */
374static void
375checkdirs(olddp)
376 struct vnode *olddp;
377{
378 struct filedesc *fdp;
379 struct vnode *newdp;
380 struct proc *p;
381
382 if (olddp->v_usecount == 1)
383 return;
384 if (VFS_ROOT(olddp->v_mountedhere, &newdp))
385 panic("mount: lost mount");
386 LIST_FOREACH(p, &allproc, p_list) {
387 fdp = p->p_fd;
388 if (fdp->fd_cdir == olddp) {
389 vrele(fdp->fd_cdir);
390 VREF(newdp);
391 fdp->fd_cdir = newdp;
392 }
393 if (fdp->fd_rdir == olddp) {
394 vrele(fdp->fd_rdir);
395 VREF(newdp);
396 fdp->fd_rdir = newdp;
397 }
398 }
399 if (rootvnode == olddp) {
400 vrele(rootvnode);
401 VREF(newdp);
402 rootvnode = newdp;
403 }
404 vput(newdp);
405}
406
407/*
408 * Unmount a file system.
409 *
410 * Note: unmount takes a path to the vnode mounted on as argument,
411 * not special file (as before).
412 */
413#ifndef _SYS_SYSPROTO_H_
414struct unmount_args {
415 char *path;
416 int flags;
417};
418#endif
419/*
420 * umount_args(char *path, int flags)
421 */
422/* ARGSUSED */
423int
424unmount(struct unmount_args *uap)
425{
426 struct proc *p = curproc;
427 struct vnode *vp;
428 struct mount *mp;
429 int error;
430 struct nameidata nd;
431
432 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
433 SCARG(uap, path), p);
434 if ((error = namei(&nd)) != 0)
435 return (error);
436 vp = nd.ni_vp;
437 NDFREE(&nd, NDF_ONLY_PNBUF);
438 mp = vp->v_mount;
439
440 /*
441 * Only root, or the user that did the original mount is
442 * permitted to unmount this filesystem.
443 */
444 if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
445 (error = suser())) {
446 vput(vp);
447 return (error);
448 }
449
450 /*
451 * Don't allow unmounting the root file system.
452 */
453 if (mp->mnt_flag & MNT_ROOTFS) {
454 vput(vp);
455 return (EINVAL);
456 }
457
458 /*
459 * Must be the root of the filesystem
460 */
461 if ((vp->v_flag & VROOT) == 0) {
462 vput(vp);
463 return (EINVAL);
464 }
465 vput(vp);
466 return (dounmount(mp, SCARG(uap, flags), p));
467}
468
469/*
470 * Do the actual file system unmount.
471 */
472int
473dounmount(mp, flags, p)
474 register struct mount *mp;
475 int flags;
476 struct proc *p;
477{
478 struct vnode *coveredvp;
479 int error;
480 int async_flag;
481
482 simple_lock(&mountlist_slock);
483 if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
484 simple_unlock(&mountlist_slock);
485 return (EBUSY);
486 }
487 mp->mnt_kern_flag |= MNTK_UNMOUNT;
488 /* Allow filesystems to detect that a forced unmount is in progress. */
489 if (flags & MNT_FORCE)
490 mp->mnt_kern_flag |= MNTK_UNMOUNTF;
491 error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
492 ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_slock, p);
493 if (error) {
494 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
495 if (mp->mnt_kern_flag & MNTK_MWAIT)
496 wakeup((caddr_t)mp);
497 return (error);
498 }
499
500 if (mp->mnt_flag & MNT_EXPUBLIC)
501 vfs_setpublicfs(NULL, NULL, NULL);
502
503 vfs_msync(mp, MNT_WAIT);
504 async_flag = mp->mnt_flag & MNT_ASYNC;
505 mp->mnt_flag &=~ MNT_ASYNC;
506 cache_purgevfs(mp); /* remove cache entries for this file sys */
507 if (mp->mnt_syncer != NULL)
508 vrele(mp->mnt_syncer);
509 if (((mp->mnt_flag & MNT_RDONLY) ||
510 (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
511 (flags & MNT_FORCE))
512 error = VFS_UNMOUNT(mp, flags, p);
513 simple_lock(&mountlist_slock);
514 if (error) {
515 if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
516 (void) vfs_allocate_syncvnode(mp);
517 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
518 mp->mnt_flag |= async_flag;
519 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
520 &mountlist_slock, p);
521 if (mp->mnt_kern_flag & MNTK_MWAIT)
522 wakeup((caddr_t)mp);
523 return (error);
524 }
525 TAILQ_REMOVE(&mountlist, mp, mnt_list);
526 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
527 coveredvp->v_mountedhere = (struct mount *)0;
528 vrele(coveredvp);
529 }
530 mp->mnt_vfc->vfc_refcount--;
531 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
532 panic("unmount: dangling vnode");
533 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock, p);
534 if (mp->mnt_kern_flag & MNTK_MWAIT)
535 wakeup((caddr_t)mp);
536 free((caddr_t)mp, M_MOUNT);
537 return (0);
538}
539
540/*
541 * Sync each mounted filesystem.
542 */
543#ifndef _SYS_SYSPROTO_H_
544struct sync_args {
545 int dummy;
546};
547#endif
548
549#ifdef DEBUG
550static int syncprt = 0;
551SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
552#endif
553
554/* ARGSUSED */
555int
556sync(struct sync_args *uap)
557{
558 struct proc *p = curproc;
559 struct mount *mp, *nmp;
560 int asyncflag;
561
562 simple_lock(&mountlist_slock);
563 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
564 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
565 nmp = TAILQ_NEXT(mp, mnt_list);
566 continue;
567 }
568 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
569 asyncflag = mp->mnt_flag & MNT_ASYNC;
570 mp->mnt_flag &= ~MNT_ASYNC;
571 vfs_msync(mp, MNT_NOWAIT);
572 VFS_SYNC(mp, MNT_NOWAIT,
573 ((p != NULL) ? p->p_ucred : NOCRED), p);
574 mp->mnt_flag |= asyncflag;
575 }
576 simple_lock(&mountlist_slock);
577 nmp = TAILQ_NEXT(mp, mnt_list);
578 vfs_unbusy(mp, p);
579 }
580 simple_unlock(&mountlist_slock);
581#if 0
582/*
583 * XXX don't call vfs_bufstats() yet because that routine
584 * was not imported in the Lite2 merge.
585 */
586#ifdef DIAGNOSTIC
587 if (syncprt)
588 vfs_bufstats();
589#endif /* DIAGNOSTIC */
590#endif
591 return (0);
592}
593
594/* XXX PRISON: could be per prison flag */
595static int prison_quotas;
596#if 0
597SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
598#endif
599
600/*
601 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg)
602 *
603 * Change filesystem quotas.
604 */
605/* ARGSUSED */
606int
607quotactl(struct quotactl_args *uap)
608{
609 struct proc *p = curproc;
610 struct mount *mp;
611 int error;
612 struct nameidata nd;
613
614 if (p->p_ucred->cr_prison && !prison_quotas)
615 return (EPERM);
616 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
617 if ((error = namei(&nd)) != 0)
618 return (error);
619 mp = nd.ni_vp->v_mount;
620 NDFREE(&nd, NDF_ONLY_PNBUF);
621 vrele(nd.ni_vp);
622 return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
623 SCARG(uap, arg), p));
624}
625
626/*
627 * statfs_args(char *path, struct statfs *buf)
628 *
629 * Get filesystem statistics.
630 */
631/* ARGSUSED */
632int
633statfs(struct statfs_args *uap)
634{
635 struct proc *p = curproc;
636 struct mount *mp;
637 struct statfs *sp;
638 int error;
639 struct nameidata nd;
640 struct statfs sb;
641
642 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
643 if ((error = namei(&nd)) != 0)
644 return (error);
645 mp = nd.ni_vp->v_mount;
646 sp = &mp->mnt_stat;
647 NDFREE(&nd, NDF_ONLY_PNBUF);
648 vrele(nd.ni_vp);
649 error = VFS_STATFS(mp, sp, p);
650 if (error)
651 return (error);
652 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
653 if (suser_xxx(p->p_ucred, 0)) {
654 bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
655 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
656 sp = &sb;
657 }
658 return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
659}
660
661/*
662 * fstatfs_args(int fd, struct statfs *buf)
663 *
664 * Get filesystem statistics.
665 */
666/* ARGSUSED */
667int
668fstatfs(struct fstatfs_args *uap)
669{
670 struct proc *p = curproc;
671 struct file *fp;
672 struct mount *mp;
673 register struct statfs *sp;
674 int error;
675 struct statfs sb;
676
677 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
678 return (error);
679 mp = ((struct vnode *)fp->f_data)->v_mount;
680 if (mp == NULL)
681 return (EBADF);
682 sp = &mp->mnt_stat;
683 error = VFS_STATFS(mp, sp, p);
684 if (error)
685 return (error);
686 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
687 if (suser_xxx(p->p_ucred, 0)) {
688 bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
689 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
690 sp = &sb;
691 }
692 return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
693}
694
695/*
696 * getfsstat_args(struct statfs *buf, long bufsize, int flags)
697 *
698 * Get statistics on all filesystems.
699 */
700/* ARGSUSED */
701int
702getfsstat(struct getfsstat_args *uap)
703{
704 struct proc *p = curproc;
705 struct mount *mp, *nmp;
706 struct statfs *sp;
707 caddr_t sfsp;
708 long count, maxcount, error;
709
710 maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
711 sfsp = (caddr_t)SCARG(uap, buf);
712 count = 0;
713 simple_lock(&mountlist_slock);
714 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
715 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
716 nmp = TAILQ_NEXT(mp, mnt_list);
717 continue;
718 }
719 if (sfsp && count < maxcount) {
720 sp = &mp->mnt_stat;
721 /*
722 * If MNT_NOWAIT or MNT_LAZY is specified, do not
723 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
724 * overrides MNT_WAIT.
725 */
726 if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
727 (SCARG(uap, flags) & MNT_WAIT)) &&
728 (error = VFS_STATFS(mp, sp, p))) {
729 simple_lock(&mountlist_slock);
730 nmp = TAILQ_NEXT(mp, mnt_list);
731 vfs_unbusy(mp, p);
732 continue;
733 }
734 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
735 error = copyout((caddr_t)sp, sfsp, sizeof(*sp));
736 if (error) {
737 vfs_unbusy(mp, p);
738 return (error);
739 }
740 sfsp += sizeof(*sp);
741 }
742 count++;
743 simple_lock(&mountlist_slock);
744 nmp = TAILQ_NEXT(mp, mnt_list);
745 vfs_unbusy(mp, p);
746 }
747 simple_unlock(&mountlist_slock);
748 if (sfsp && count > maxcount)
749 p->p_retval[0] = maxcount;
750 else
751 p->p_retval[0] = count;
752 return (0);
753}
754
755/*
756 * fchdir_args(int fd)
757 *
758 * Change current working directory to a given file descriptor.
759 */
760/* ARGSUSED */
761int
762fchdir(struct fchdir_args *uap)
763{
764 struct proc *p = curproc;
765 struct filedesc *fdp = p->p_fd;
766 struct vnode *vp, *tdp;
767 struct mount *mp;
768 struct file *fp;
769 int error;
770
771 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
772 return (error);
773 vp = (struct vnode *)fp->f_data;
774 VREF(vp);
775 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
776 if (vp->v_type != VDIR)
777 error = ENOTDIR;
778 else
779 error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
780 while (!error && (mp = vp->v_mountedhere) != NULL) {
781 if (vfs_busy(mp, 0, 0, p))
782 continue;
783 error = VFS_ROOT(mp, &tdp);
784 vfs_unbusy(mp, p);
785 if (error)
786 break;
787 vput(vp);
788 vp = tdp;
789 }
790 if (error) {
791 vput(vp);
792 return (error);
793 }
794 VOP_UNLOCK(vp, 0, p);
795 vrele(fdp->fd_cdir);
796 fdp->fd_cdir = vp;
797 return (0);
798}
799
800/*
801 * chdir_args(char *path)
802 *
803 * Change current working directory (``.'').
804 */
805/* ARGSUSED */
806int
807chdir(struct chdir_args *uap)
808{
809 struct proc *p = curproc;
810 struct filedesc *fdp = p->p_fd;
811 int error;
812 struct nameidata nd;
813
814 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
815 SCARG(uap, path), p);
816 if ((error = change_dir(&nd, p)) != 0)
817 return (error);
818 NDFREE(&nd, NDF_ONLY_PNBUF);
819 vrele(fdp->fd_cdir);
820 fdp->fd_cdir = nd.ni_vp;
821 return (0);
822}
823
824/*
825 * Helper function for raised chroot(2) security function: Refuse if
826 * any filedescriptors are open directories.
827 */
828static int
829chroot_refuse_vdir_fds(fdp)
830 struct filedesc *fdp;
831{
832 struct vnode *vp;
833 struct file *fp;
834 int error;
835 int fd;
836
837 for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
838 error = getvnode(fdp, fd, &fp);
839 if (error)
840 continue;
841 vp = (struct vnode *)fp->f_data;
842 if (vp->v_type != VDIR)
843 continue;
844 return(EPERM);
845 }
846 return (0);
847}
848
849/*
850 * This sysctl determines if we will allow a process to chroot(2) if it
851 * has a directory open:
852 * 0: disallowed for all processes.
853 * 1: allowed for processes that were not already chroot(2)'ed.
854 * 2: allowed for all processes.
855 */
856
857static int chroot_allow_open_directories = 1;
858
859SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
860 &chroot_allow_open_directories, 0, "");
861
862/*
863 * chroot_args(char *path)
864 *
865 * Change notion of root (``/'') directory.
866 */
867/* ARGSUSED */
868int
869chroot(struct chroot_args *uap)
870{
871 struct proc *p = curproc;
872 struct filedesc *fdp = p->p_fd;
873 int error;
874 struct nameidata nd;
875
876 error = suser_xxx(0, PRISON_ROOT);
877 if (error)
878 return (error);
879 if (chroot_allow_open_directories == 0 ||
880 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode))
881 error = chroot_refuse_vdir_fds(fdp);
882 if (error)
883 return (error);
884 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
885 SCARG(uap, path), p);
886 if ((error = change_dir(&nd, p)) != 0)
887 return (error);
888 NDFREE(&nd, NDF_ONLY_PNBUF);
889 vrele(fdp->fd_rdir);
890 fdp->fd_rdir = nd.ni_vp;
891 if (!fdp->fd_jdir) {
892 fdp->fd_jdir = nd.ni_vp;
893 VREF(fdp->fd_jdir);
894 }
895 return (0);
896}
897
898/*
899 * Common routine for chroot and chdir.
900 */
901static int
902change_dir(struct nameidata *ndp, struct proc *p)
903{
904 struct vnode *vp;
905 int error;
906
907 error = namei(ndp);
908 if (error)
909 return (error);
910 vp = ndp->ni_vp;
911 if (vp->v_type != VDIR)
912 error = ENOTDIR;
913 else
914 error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
915 if (error)
916 vput(vp);
917 else
918 VOP_UNLOCK(vp, 0, p);
919 return (error);
920}
921
922/*
923 * open_args(char *path, int flags, int mode)
924 *
925 * Check permissions, allocate an open file structure,
926 * and call the device open routine if any.
927 */
928int
929open(struct open_args *uap)
930{
931 struct proc *p = curproc;
932 struct filedesc *fdp = p->p_fd;
933 struct file *fp;
934 struct vnode *vp;
935 int cmode, flags, oflags;
936 struct file *nfp;
937 int type, indx, error;
938 struct flock lf;
939 struct nameidata nd;
940
941 oflags = SCARG(uap, flags);
942 if ((oflags & O_ACCMODE) == O_ACCMODE)
943 return (EINVAL);
944 flags = FFLAGS(oflags);
945 error = falloc(p, &nfp, &indx);
946 if (error)
947 return (error);
948 fp = nfp;
949 cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
950 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
951 p->p_dupfd = -indx - 1; /* XXX check for fdopen */
952 /*
953 * Bump the ref count to prevent another process from closing
954 * the descriptor while we are blocked in vn_open()
955 */
956 fhold(fp);
957 error = vn_open(&nd, flags, cmode);
958 if (error) {
959 /*
960 * release our own reference
961 */
962 fdrop(fp, p);
963
964 /*
965 * handle special fdopen() case. bleh. dupfdopen() is
966 * responsible for dropping the old contents of ofiles[indx]
967 * if it succeeds.
968 */
969 if ((error == ENODEV || error == ENXIO) &&
970 p->p_dupfd >= 0 && /* XXX from fdopen */
971 (error =
972 dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) {
973 p->p_retval[0] = indx;
974 return (0);
975 }
976 /*
977 * Clean up the descriptor, but only if another thread hadn't
978 * replaced or closed it.
979 */
980 if (fdp->fd_ofiles[indx] == fp) {
981 fdp->fd_ofiles[indx] = NULL;
982 fdrop(fp, p);
983 }
984
985 if (error == ERESTART)
986 error = EINTR;
987 return (error);
988 }
989 p->p_dupfd = 0;
990 NDFREE(&nd, NDF_ONLY_PNBUF);
991 vp = nd.ni_vp;
992
993 /*
994 * There should be 2 references on the file, one from the descriptor
995 * table, and one for us.
996 *
997 * Handle the case where someone closed the file (via its file
998 * descriptor) while we were blocked. The end result should look
999 * like opening the file succeeded but it was immediately closed.
1000 */
1001 if (fp->f_count == 1) {
1002 KASSERT(fdp->fd_ofiles[indx] != fp,
1003 ("Open file descriptor lost all refs"));
1004 VOP_UNLOCK(vp, 0, p);
1005 vn_close(vp, flags & FMASK, fp->f_cred, p);
1006 fdrop(fp, p);
1007 p->p_retval[0] = indx;
1008 return 0;
1009 }
1010
1011 fp->f_data = (caddr_t)vp;
1012 fp->f_flag = flags & FMASK;
1013 fp->f_ops = &vnops;
1014 fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1015 if (flags & (O_EXLOCK | O_SHLOCK)) {
1016 lf.l_whence = SEEK_SET;
1017 lf.l_start = 0;
1018 lf.l_len = 0;
1019 if (flags & O_EXLOCK)
1020 lf.l_type = F_WRLCK;
1021 else
1022 lf.l_type = F_RDLCK;
1023 type = F_FLOCK;
1024 if ((flags & FNONBLOCK) == 0)
1025 type |= F_WAIT;
1026 VOP_UNLOCK(vp, 0, p);
1027 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
1028 /*
1029 * lock request failed. Normally close the descriptor
1030 * but handle the case where someone might have dup()d
1031 * it when we weren't looking. One reference is
1032 * owned by the descriptor array, the other by us.
1033 */
1034 if (fdp->fd_ofiles[indx] == fp) {
1035 fdp->fd_ofiles[indx] = NULL;
1036 fdrop(fp, p);
1037 }
1038 fdrop(fp, p);
1039 return (error);
1040 }
1041 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1042 fp->f_flag |= FHASLOCK;
1043 }
1044 /* assert that vn_open created a backing object if one is needed */
1045 KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
1046 ("open: vmio vnode has no backing object after vn_open"));
1047 VOP_UNLOCK(vp, 0, p);
1048
1049 /*
1050 * release our private reference, leaving the one associated with the
1051 * descriptor table intact.
1052 */
1053 fdrop(fp, p);
1054 p->p_retval[0] = indx;
1055 return (0);
1056}
1057
1058#ifdef COMPAT_43
1059/*
1060 * ocreat(char *path, int mode)
1061 *
1062 * Create a file.
1063 */
1064int
1065ocreat(struct ocreat_args *uap)
1066{
1067 struct open_args /* {
1068 syscallarg(char *) path;
1069 syscallarg(int) flags;
1070 syscallarg(int) mode;
1071 } */ nuap;
1072
1073 SCARG(&nuap, path) = SCARG(uap, path);
1074 SCARG(&nuap, mode) = SCARG(uap, mode);
1075 SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
1076 return (open(&nuap));
1077}
1078#endif /* COMPAT_43 */
1079
1080/*
1081 * mknod_args(char *path, int mode, int dev)
1082 *
1083 * Create a special file.
1084 */
1085/* ARGSUSED */
1086int
1087mknod(struct mknod_args *uap)
1088{
1089 struct proc *p = curproc;
1090 struct vnode *vp;
1091 struct vattr vattr;
1092 int error;
1093 int whiteout = 0;
1094 struct nameidata nd;
1095
1096 switch (SCARG(uap, mode) & S_IFMT) {
1097 case S_IFCHR:
1098 case S_IFBLK:
1099 error = suser();
1100 break;
1101 default:
1102 error = suser_xxx(0, PRISON_ROOT);
1103 break;
1104 }
1105 if (error)
1106 return (error);
1107 bwillwrite();
1108 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1109 if ((error = namei(&nd)) != 0)
1110 return (error);
1111 vp = nd.ni_vp;
1112 if (vp != NULL)
1113 error = EEXIST;
1114 else {
1115 VATTR_NULL(&vattr);
1116 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
1117 vattr.va_rdev = SCARG(uap, dev);
1118 whiteout = 0;
1119
1120 switch (SCARG(uap, mode) & S_IFMT) {
1121 case S_IFMT: /* used by badsect to flag bad sectors */
1122 vattr.va_type = VBAD;
1123 break;
1124 case S_IFCHR:
1125 vattr.va_type = VCHR;
1126 break;
1127 case S_IFBLK:
1128 vattr.va_type = VBLK;
1129 break;
1130 case S_IFWHT:
1131 whiteout = 1;
1132 break;
1133 default:
1134 error = EINVAL;
1135 break;
1136 }
1137 }
1138 if (!error) {
1139 VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1140 if (whiteout)
1141 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1142 else {
1143 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1144 &nd.ni_cnd, &vattr);
1145 if (error == 0)
1146 vput(nd.ni_vp);
1147 }
1148 NDFREE(&nd, NDF_ONLY_PNBUF);
1149 vput(nd.ni_dvp);
1150 } else {
1151 NDFREE(&nd, NDF_ONLY_PNBUF);
1152 if (nd.ni_dvp == vp)
1153 vrele(nd.ni_dvp);
1154 else
1155 vput(nd.ni_dvp);
1156 if (vp)
1157 vrele(vp);
1158 }
1159 ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1160 ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1161 return (error);
1162}
1163
1164/*
1165 * mkfifo_args(char *path, int mode)
1166 *
1167 * Create a named pipe.
1168 */
1169/* ARGSUSED */
1170int
1171mkfifo(struct mkfifo_args *uap)
1172{
1173 struct proc *p = curproc;
1174 struct vattr vattr;
1175 int error;
1176 struct nameidata nd;
1177
1178 bwillwrite();
1179 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1180 if ((error = namei(&nd)) != 0)
1181 return (error);
1182 if (nd.ni_vp != NULL) {
1183 NDFREE(&nd, NDF_ONLY_PNBUF);
1184 if (nd.ni_dvp == nd.ni_vp)
1185 vrele(nd.ni_dvp);
1186 else
1187 vput(nd.ni_dvp);
1188 vrele(nd.ni_vp);
1189 return (EEXIST);
1190 }
1191 VATTR_NULL(&vattr);
1192 vattr.va_type = VFIFO;
1193 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
1194 VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1195 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1196 if (error == 0)
1197 vput(nd.ni_vp);
1198 NDFREE(&nd, NDF_ONLY_PNBUF);
1199 vput(nd.ni_dvp);
1200 return (error);
1201}
1202
1203/*
1204 * link_args(char *path, char *link)
1205 *
1206 * Make a hard file link.
1207 */
1208/* ARGSUSED */
1209int
1210link(struct link_args *uap)
1211{
1212 struct proc *p = curproc;
1213 struct vnode *vp;
1214 struct nameidata nd;
1215 int error;
1216
1217 bwillwrite();
1218 NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), p);
1219 if ((error = namei(&nd)) != 0)
1220 return (error);
1221 NDFREE(&nd, NDF_ONLY_PNBUF);
1222 vp = nd.ni_vp;
1223 if (vp->v_type == VDIR)
1224 error = EPERM; /* POSIX */
1225 else {
1226 NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
1227 error = namei(&nd);
1228 if (!error) {
1229 if (nd.ni_vp != NULL) {
1230 if (nd.ni_vp)
1231 vrele(nd.ni_vp);
1232 error = EEXIST;
1233 } else {
1234 VOP_LEASE(nd.ni_dvp, p, p->p_ucred,
1235 LEASE_WRITE);
1236 VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1237 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1238 }
1239 NDFREE(&nd, NDF_ONLY_PNBUF);
1240 if (nd.ni_dvp == nd.ni_vp)
1241 vrele(nd.ni_dvp);
1242 else
1243 vput(nd.ni_dvp);
1244 }
1245 }
1246 vrele(vp);
1247 ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1248 ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1249 return (error);
1250}
1251
1252/*
1253 * symlink(char *path, char *link)
1254 *
1255 * Make a symbolic link.
1256 */
1257/* ARGSUSED */
1258int
1259symlink(struct symlink_args *uap)
1260{
1261 struct proc *p = curproc;
1262 struct vattr vattr;
1263 char *path;
1264 int error;
1265 struct nameidata nd;
1266
1267 path = zalloc(namei_zone);
1268 if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
1269 goto out;
1270 bwillwrite();
1271 NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
1272 if ((error = namei(&nd)) != 0)
1273 goto out;
1274 if (nd.ni_vp) {
1275 NDFREE(&nd, NDF_ONLY_PNBUF);
1276 if (nd.ni_dvp == nd.ni_vp)
1277 vrele(nd.ni_dvp);
1278 else
1279 vput(nd.ni_dvp);
1280 vrele(nd.ni_vp);
1281 error = EEXIST;
1282 goto out;
1283 }
1284 VATTR_NULL(&vattr);
1285 vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1286 VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1287 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1288 NDFREE(&nd, NDF_ONLY_PNBUF);
1289 if (error == 0)
1290 vput(nd.ni_vp);
1291 vput(nd.ni_dvp);
1292 ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1293 ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1294out:
1295 zfree(namei_zone, path);
1296 return (error);
1297}
1298
1299/*
1300 * undelete_args(char *path)
1301 *
1302 * Delete a whiteout from the filesystem.
1303 */
1304/* ARGSUSED */
1305int
1306undelete(struct undelete_args *uap)
1307{
1308 struct proc *p = curproc;
1309 int error;
1310 struct nameidata nd;
1311
1312 bwillwrite();
1313 NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1314 SCARG(uap, path), p);
1315 error = namei(&nd);
1316 if (error)
1317 return (error);
1318
1319 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1320 NDFREE(&nd, NDF_ONLY_PNBUF);
1321 if (nd.ni_dvp == nd.ni_vp)
1322 vrele(nd.ni_dvp);
1323 else
1324 vput(nd.ni_dvp);
1325 if (nd.ni_vp)
1326 vrele(nd.ni_vp);
1327 return (EEXIST);
1328 }
1329
1330 VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1331 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1332 NDFREE(&nd, NDF_ONLY_PNBUF);
1333 vput(nd.ni_dvp);
1334 ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1335 ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1336 return (error);
1337}
1338
1339/*
1340 * unlink_args(char *path)
1341 *
1342 * Delete a name from the filesystem.
1343 */
1344int
1345unlink(struct unlink_args *uap)
1346{
1347 struct proc *p = curproc;
1348 struct vnode *vp;
1349 int error;
1350 struct nameidata nd;
1351
1352 bwillwrite();
1353 NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1354 if ((error = namei(&nd)) != 0)
1355 return (error);
1356 vp = nd.ni_vp;
1357 VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1358 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1359
1360 if (vp->v_type == VDIR)
1361 error = EPERM; /* POSIX */
1362 else {
1363 /*
1364 * The root of a mounted filesystem cannot be deleted.
1365 *
1366 * XXX: can this only be a VDIR case?
1367 */
1368 if (vp->v_flag & VROOT)
1369 error = EBUSY;
1370 }
1371
1372 if (!error) {
1373 VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1374 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1375 }
1376 NDFREE(&nd, NDF_ONLY_PNBUF);
1377 if (nd.ni_dvp == vp)
1378 vrele(nd.ni_dvp);
1379 else
1380 vput(nd.ni_dvp);
1381 if (vp != NULLVP)
1382 vput(vp);
1383 ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1384 ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1385 return (error);
1386}
1387
1388/*
1389 * lseek_args(int fd, int pad, off_t offset, int whence)
1390 *
1391 * Reposition read/write file offset.
1392 */
1393int
1394lseek(struct lseek_args *uap)
1395{
1396 struct proc *p = curproc;
1397 struct ucred *cred = p->p_ucred;
1398 struct filedesc *fdp = p->p_fd;
1399 struct file *fp;
1400 struct vattr vattr;
1401 int error;
1402
1403 if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
1404 (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
1405 return (EBADF);
1406 if (fp->f_type != DTYPE_VNODE)
1407 return (ESPIPE);
1408 switch (SCARG(uap, whence)) {
1409 case L_INCR:
1410 fp->f_offset += SCARG(uap, offset);
1411 break;
1412 case L_XTND:
1413 error=VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p);
1414 if (error)
1415 return (error);
1416 fp->f_offset = SCARG(uap, offset) + vattr.va_size;
1417 break;
1418 case L_SET:
1419 fp->f_offset = SCARG(uap, offset);
1420 break;
1421 default:
1422 return (EINVAL);
1423 }
1424 *(off_t *)(p->p_retval) = fp->f_offset;
1425 return (0);
1426}
1427
1428#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1429/*
1430 * Reposition read/write file offset.
1431 *
1432 * olseek_args(int fd, long offset, int whence)
1433 */
1434int
1435olseek(struct olseek_args *uap)
1436{
1437 struct lseek_args /* {
1438 syscallarg(int) fd;
1439 syscallarg(int) pad;
1440 syscallarg(off_t) offset;
1441 syscallarg(int) whence;
1442 } */ nuap;
1443 int error;
1444
1445 SCARG(&nuap, fd) = SCARG(uap, fd);
1446 SCARG(&nuap, offset) = SCARG(uap, offset);
1447 SCARG(&nuap, whence) = SCARG(uap, whence);
1448 error = lseek(&nuap);
1449 return (error);
1450}
1451#endif /* COMPAT_43 */
1452
1453/*
1454 * access_args(char *path, int flags)
1455 *
1456 * Check access permissions.
1457 */
1458int
1459access(struct access_args *uap)
1460{
1461 struct proc *p = curproc;
1462 struct ucred *cred, *tmpcred;
1463 struct vnode *vp;
1464 int error, flags;
1465 struct nameidata nd;
1466
1467 cred = p->p_ucred;
1468 /*
1469 * Create and modify a temporary credential instead of one that
1470 * is potentially shared. This could also mess up socket
1471 * buffer accounting which can run in an interrupt context.
1472 */
1473 tmpcred = crdup(cred);
1474 tmpcred->cr_uid = p->p_ucred->cr_ruid;
1475 tmpcred->cr_groups[0] = p->p_ucred->cr_rgid;
1476 p->p_ucred = tmpcred;
1477 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1478 SCARG(uap, path), p);
1479 if ((error = namei(&nd)) != 0)
1480 goto out1;
1481 vp = nd.ni_vp;
1482
1483 /* Flags == 0 means only check for existence. */
1484 if (SCARG(uap, flags)) {
1485 flags = 0;
1486 if (SCARG(uap, flags) & R_OK)
1487 flags |= VREAD;
1488 if (SCARG(uap, flags) & W_OK)
1489 flags |= VWRITE;
1490 if (SCARG(uap, flags) & X_OK)
1491 flags |= VEXEC;
1492 if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1493 error = VOP_ACCESS(vp, flags, tmpcred, p);
1494 }
1495 NDFREE(&nd, NDF_ONLY_PNBUF);
1496 vput(vp);
1497out1:
1498 p->p_ucred = cred;
1499 crfree(tmpcred);
1500 return (error);
1501}
1502
1503#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1504/*
1505 * ostat_args(char *path, struct ostat *ub)
1506 *
1507 * Get file status; this version follows links.
1508 */
1509/* ARGSUSED */
1510int
1511ostat(struct ostat_args *uap)
1512{
1513 struct proc *p = curproc;
1514 struct stat sb;
1515 struct ostat osb;
1516 int error;
1517 struct nameidata nd;
1518
1519 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1520 SCARG(uap, path), p);
1521 if ((error = namei(&nd)) != 0)
1522 return (error);
1523 NDFREE(&nd, NDF_ONLY_PNBUF);
1524 error = vn_stat(nd.ni_vp, &sb, p);
1525 vput(nd.ni_vp);
1526 if (error)
1527 return (error);
1528 cvtstat(&sb, &osb);
1529 error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1530 return (error);
1531}
1532
1533/*
1534 * olstat_args(char *path, struct ostat *ub)
1535 *
1536 * Get file status; this version does not follow links.
1537 */
1538/* ARGSUSED */
1539int
1540olstat(struct olstat_args *uap)
1541{
1542 struct proc *p = curproc;
1543 struct vnode *vp;
1544 struct stat sb;
1545 struct ostat osb;
1546 int error;
1547 struct nameidata nd;
1548
1549 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1550 SCARG(uap, path), p);
1551 if ((error = namei(&nd)) != 0)
1552 return (error);
1553 vp = nd.ni_vp;
1554 error = vn_stat(vp, &sb, p);
1555 NDFREE(&nd, NDF_ONLY_PNBUF);
1556 vput(vp);
1557 if (error)
1558 return (error);
1559 cvtstat(&sb, &osb);
1560 error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1561 return (error);
1562}
1563
1564/*
1565 * Convert from an old to a new stat structure.
1566 */
1567void
1568cvtstat(st, ost)
1569 struct stat *st;
1570 struct ostat *ost;
1571{
1572 ost->st_dev = st->st_dev;
1573 ost->st_ino = st->st_ino;
1574 ost->st_mode = st->st_mode;
1575 ost->st_nlink = st->st_nlink;
1576 ost->st_uid = st->st_uid;
1577 ost->st_gid = st->st_gid;
1578 ost->st_rdev = st->st_rdev;
1579 if (st->st_size < (quad_t)1 << 32)
1580 ost->st_size = st->st_size;
1581 else
1582 ost->st_size = -2;
1583 ost->st_atime = st->st_atime;
1584 ost->st_mtime = st->st_mtime;
1585 ost->st_ctime = st->st_ctime;
1586 ost->st_blksize = st->st_blksize;
1587 ost->st_blocks = st->st_blocks;
1588 ost->st_flags = st->st_flags;
1589 ost->st_gen = st->st_gen;
1590}
1591#endif /* COMPAT_43 || COMPAT_SUNOS */
1592
1593/*
1594 * stat_args(char *path, struct stat *ub)
1595 *
1596 * Get file status; this version follows links.
1597 */
1598/* ARGSUSED */
1599int
1600stat(struct stat_args *uap)
1601{
1602 struct proc *p = curproc;
1603 struct stat sb;
1604 int error;
1605 struct nameidata nd;
1606
1607 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1608 SCARG(uap, path), p);
1609 if ((error = namei(&nd)) != 0)
1610 return (error);
1611 error = vn_stat(nd.ni_vp, &sb, p);
1612 NDFREE(&nd, NDF_ONLY_PNBUF);
1613 vput(nd.ni_vp);
1614 if (error)
1615 return (error);
1616 error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
1617 return (error);
1618}
1619
1620/*
1621 * lstat_args(char *path, struct stat *ub)
1622 *
1623 * Get file status; this version does not follow links.
1624 */
1625/* ARGSUSED */
1626int
1627lstat(struct lstat_args *uap)
1628{
1629 struct proc *p = curproc;
1630 int error;
1631 struct vnode *vp;
1632 struct stat sb;
1633 struct nameidata nd;
1634
1635 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1636 SCARG(uap, path), p);
1637 if ((error = namei(&nd)) != 0)
1638 return (error);
1639 vp = nd.ni_vp;
1640 error = vn_stat(vp, &sb, p);
1641 NDFREE(&nd, NDF_ONLY_PNBUF);
1642 vput(vp);
1643 if (error)
1644 return (error);
1645 error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
1646 return (error);
1647}
1648
1649void
1650cvtnstat(sb, nsb)
1651 struct stat *sb;
1652 struct nstat *nsb;
1653{
1654 nsb->st_dev = sb->st_dev;
1655 nsb->st_ino = sb->st_ino;
1656 nsb->st_mode = sb->st_mode;
1657 nsb->st_nlink = sb->st_nlink;
1658 nsb->st_uid = sb->st_uid;
1659 nsb->st_gid = sb->st_gid;
1660 nsb->st_rdev = sb->st_rdev;
1661 nsb->st_atimespec = sb->st_atimespec;
1662 nsb->st_mtimespec = sb->st_mtimespec;
1663 nsb->st_ctimespec = sb->st_ctimespec;
1664 nsb->st_size = sb->st_size;
1665 nsb->st_blocks = sb->st_blocks;
1666 nsb->st_blksize = sb->st_blksize;
1667 nsb->st_flags = sb->st_flags;
1668 nsb->st_gen = sb->st_gen;
1669 nsb->st_qspare[0] = sb->st_qspare[0];
1670 nsb->st_qspare[1] = sb->st_qspare[1];
1671}
1672
1673/*
1674 * nstat_args(char *path, struct nstat *ub)
1675 */
1676/* ARGSUSED */
1677int
1678nstat(struct nstat_args *uap)
1679{
1680 struct proc *p = curproc;
1681 struct stat sb;
1682 struct nstat nsb;
1683 int error;
1684 struct nameidata nd;
1685
1686 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1687 SCARG(uap, path), p);
1688 if ((error = namei(&nd)) != 0)
1689 return (error);
1690 NDFREE(&nd, NDF_ONLY_PNBUF);
1691 error = vn_stat(nd.ni_vp, &sb, p);
1692 vput(nd.ni_vp);
1693 if (error)
1694 return (error);
1695 cvtnstat(&sb, &nsb);
1696 error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
1697 return (error);
1698}
1699
1700/*
1701 * lstat_args(char *path, struct stat *ub)
1702 *
1703 * Get file status; this version does not follow links.
1704 */
1705/* ARGSUSED */
1706int
1707nlstat(struct nlstat_args *uap)
1708{
1709 struct proc *p = curproc;
1710 int error;
1711 struct vnode *vp;
1712 struct stat sb;
1713 struct nstat nsb;
1714 struct nameidata nd;
1715
1716 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1717 SCARG(uap, path), p);
1718 if ((error = namei(&nd)) != 0)
1719 return (error);
1720 vp = nd.ni_vp;
1721 NDFREE(&nd, NDF_ONLY_PNBUF);
1722 error = vn_stat(vp, &sb, p);
1723 vput(vp);
1724 if (error)
1725 return (error);
1726 cvtnstat(&sb, &nsb);
1727 error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
1728 return (error);
1729}
1730
1731/*
1732 * pathconf_Args(char *path, int name)
1733 *
1734 * Get configurable pathname variables.
1735 */
1736/* ARGSUSED */
1737int
1738pathconf(struct pathconf_args *uap)
1739{
1740 struct proc *p = curproc;
1741 int error;
1742 struct nameidata nd;
1743
1744 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1745 SCARG(uap, path), p);
1746 if ((error = namei(&nd)) != 0)
1747 return (error);
1748 NDFREE(&nd, NDF_ONLY_PNBUF);
1749 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), p->p_retval);
1750 vput(nd.ni_vp);
1751 return (error);
1752}
1753
1754/*
1755 * readlink_args(char *path, char *buf, int count)
1756 *
1757 * Return target name of a symbolic link.
1758 */
1759/* ARGSUSED */
1760int
1761readlink(struct readlink_args *uap)
1762{
1763 struct proc *p = curproc;
1764 struct vnode *vp;
1765 struct iovec aiov;
1766 struct uio auio;
1767 int error;
1768 struct nameidata nd;
1769
1770 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1771 SCARG(uap, path), p);
1772 if ((error = namei(&nd)) != 0)
1773 return (error);
1774 NDFREE(&nd, NDF_ONLY_PNBUF);
1775 vp = nd.ni_vp;
1776 if (vp->v_type != VLNK)
1777 error = EINVAL;
1778 else {
1779 aiov.iov_base = SCARG(uap, buf);
1780 aiov.iov_len = SCARG(uap, count);
1781 auio.uio_iov = &aiov;
1782 auio.uio_iovcnt = 1;
1783 auio.uio_offset = 0;
1784 auio.uio_rw = UIO_READ;
1785 auio.uio_segflg = UIO_USERSPACE;
1786 auio.uio_procp = p;
1787 auio.uio_resid = SCARG(uap, count);
1788 error = VOP_READLINK(vp, &auio, p->p_ucred);
1789 }
1790 vput(vp);
1791 p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
1792 return (error);
1793}
1794
1795static int
1796setfflags(struct vnode *vp, int flags)
1797{
1798 struct proc *p = curproc;
1799 int error;
1800 struct vattr vattr;
1801
1802 /*
1803 * Prevent non-root users from setting flags on devices. When
1804 * a device is reused, users can retain ownership of the device
1805 * if they are allowed to set flags and programs assume that
1806 * chown can't fail when done as root.
1807 */
1808 if ((vp->v_type == VCHR || vp->v_type == VBLK) &&
1809 ((error = suser_xxx(p->p_ucred, PRISON_ROOT)) != 0))
1810 return (error);
1811
1812 VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1813 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1814 VATTR_NULL(&vattr);
1815 vattr.va_flags = flags;
1816 error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1817 VOP_UNLOCK(vp, 0, p);
1818 return (error);
1819}
1820
1821/*
1822 * chflags(char *path, int flags)
1823 *
1824 * Change flags of a file given a path name.
1825 */
1826/* ARGSUSED */
1827int
1828chflags(struct chflags_args *uap)
1829{
1830 struct proc *p = curproc;
1831 int error;
1832 struct nameidata nd;
1833
1834 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1835 if ((error = namei(&nd)) != 0)
1836 return (error);
1837 NDFREE(&nd, NDF_ONLY_PNBUF);
1838 error = setfflags(nd.ni_vp, SCARG(uap, flags));
1839 vrele(nd.ni_vp);
1840 return error;
1841}
1842
1843/*
1844 * fchflags_args(int fd, int flags)
1845 *
1846 * Change flags of a file given a file descriptor.
1847 */
1848/* ARGSUSED */
1849int
1850fchflags(struct fchflags_args *uap)
1851{
1852 struct proc *p = curproc;
1853 struct file *fp;
1854 int error;
1855
1856 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
1857 return (error);
1858 return setfflags((struct vnode *) fp->f_data, SCARG(uap, flags));
1859}
1860
1861static int
1862setfmode(struct vnode *vp, int mode)
1863{
1864 struct proc *p = curproc;
1865 int error;
1866 struct vattr vattr;
1867
1868 VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1869 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1870 VATTR_NULL(&vattr);
1871 vattr.va_mode = mode & ALLPERMS;
1872 error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1873 VOP_UNLOCK(vp, 0, p);
1874 return error;
1875}
1876
1877/*
1878 * chmod_args(char *path, int mode)
1879 *
1880 * Change mode of a file given path name.
1881 */
1882/* ARGSUSED */
1883int
1884chmod(struct chmod_args *uap)
1885{
1886 struct proc *p = curproc;
1887 int error;
1888 struct nameidata nd;
1889
1890 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1891 if ((error = namei(&nd)) != 0)
1892 return (error);
1893 NDFREE(&nd, NDF_ONLY_PNBUF);
1894 error = setfmode(nd.ni_vp, SCARG(uap, mode));
1895 vrele(nd.ni_vp);
1896 return error;
1897}
1898
1899/*
1900 * lchmod_args(char *path, int mode)
1901 *
1902 * Change mode of a file given path name (don't follow links.)
1903 */
1904/* ARGSUSED */
1905int
1906lchmod(struct lchmod_args *uap)
1907{
1908 struct proc *p = curproc;
1909 int error;
1910 struct nameidata nd;
1911
1912 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1913 if ((error = namei(&nd)) != 0)
1914 return (error);
1915 NDFREE(&nd, NDF_ONLY_PNBUF);
1916 error = setfmode(nd.ni_vp, SCARG(uap, mode));
1917 vrele(nd.ni_vp);
1918 return error;
1919}
1920
1921/*
1922 * fchmod_args(int fd, int mode)
1923 *
1924 * Change mode of a file given a file descriptor.
1925 */
1926/* ARGSUSED */
1927int
1928fchmod(struct fchmod_args *uap)
1929{
1930 struct proc *p = curproc;
1931 struct file *fp;
1932 int error;
1933
1934 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
1935 return (error);
1936 return setfmode((struct vnode *)fp->f_data, SCARG(uap, mode));
1937}
1938
1939static int
1940setfown(struct vnode *vp, uid_t uid, gid_t gid)
1941{
1942 struct proc *p = curproc;
1943 int error;
1944 struct vattr vattr;
1945
1946 VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1947 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1948 VATTR_NULL(&vattr);
1949 vattr.va_uid = uid;
1950 vattr.va_gid = gid;
1951 error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1952 VOP_UNLOCK(vp, 0, p);
1953 return error;
1954}
1955
1956/*
1957 * chown(char *path, int uid, int gid)
1958 *
1959 * Set ownership given a path name.
1960 */
1961/* ARGSUSED */
1962int
1963chown(struct chown_args *uap)
1964{
1965 struct proc *p = curproc;
1966 int error;
1967 struct nameidata nd;
1968
1969 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1970 if ((error = namei(&nd)) != 0)
1971 return (error);
1972 NDFREE(&nd, NDF_ONLY_PNBUF);
1973 error = setfown(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
1974 vrele(nd.ni_vp);
1975 return (error);
1976}
1977
1978/*
1979 * lchown_args(char *path, int uid, int gid)
1980 *
1981 * Set ownership given a path name, do not cross symlinks.
1982 */
1983/* ARGSUSED */
1984int
1985lchown(struct lchown_args *uap)
1986{
1987 struct proc *p = curproc;
1988 int error;
1989 struct nameidata nd;
1990
1991 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1992 if ((error = namei(&nd)) != 0)
1993 return (error);
1994 NDFREE(&nd, NDF_ONLY_PNBUF);
1995 error = setfown(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
1996 vrele(nd.ni_vp);
1997 return (error);
1998}
1999
2000/*
2001 * fchown_args(int fd, int uid, int gid)
2002 *
2003 * Set ownership given a file descriptor.
2004 */
2005/* ARGSUSED */
2006int
2007fchown(struct fchown_args *uap)
2008{
2009 struct proc *p = curproc;
2010 struct file *fp;
2011 int error;
2012
2013 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2014 return (error);
2015 return setfown((struct vnode *)fp->f_data,
2016 SCARG(uap, uid), SCARG(uap, gid));
2017}
2018
2019static int
2020getutimes(const struct timeval *usrtvp, struct timespec *tsp)
2021{
2022 struct timeval tv[2];
2023 int error;
2024
2025 if (usrtvp == NULL) {
2026 microtime(&tv[0]);
2027 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2028 tsp[1] = tsp[0];
2029 } else {
2030 if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0)
2031 return (error);
2032 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2033 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
2034 }
2035 return 0;
2036}
2037
2038static int
2039setutimes(struct vnode *vp, const struct timespec *ts, int nullflag)
2040{
2041 struct proc *p = curproc;
2042 int error;
2043 struct vattr vattr;
2044
2045 VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2046 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2047 VATTR_NULL(&vattr);
2048 vattr.va_atime = ts[0];
2049 vattr.va_mtime = ts[1];
2050 if (nullflag)
2051 vattr.va_vaflags |= VA_UTIMES_NULL;
2052 error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2053 VOP_UNLOCK(vp, 0, p);
2054 return error;
2055}
2056
2057/*
2058 * utimes_args(char *path, struct timeval *tptr)
2059 *
2060 * Set the access and modification times of a file.
2061 */
2062/* ARGSUSED */
2063int
2064utimes(struct utimes_args *uap)
2065{
2066 struct proc *p = curproc;
2067 struct timespec ts[2];
2068 struct timeval *usrtvp;
2069 int error;
2070 struct nameidata nd;
2071
2072 usrtvp = SCARG(uap, tptr);
2073 if ((error = getutimes(usrtvp, ts)) != 0)
2074 return (error);
2075 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2076 if ((error = namei(&nd)) != 0)
2077 return (error);
2078 NDFREE(&nd, NDF_ONLY_PNBUF);
2079 error = setutimes(nd.ni_vp, ts, usrtvp == NULL);
2080 vrele(nd.ni_vp);
2081 return (error);
2082}
2083
2084/*
2085 * lutimes_args(char *path, struct timeval *tptr)
2086 *
2087 * Set the access and modification times of a file.
2088 */
2089/* ARGSUSED */
2090int
2091lutimes(struct lutimes_args *uap)
2092{
2093 struct proc *p = curproc;
2094 struct timespec ts[2];
2095 struct timeval *usrtvp;
2096 int error;
2097 struct nameidata nd;
2098
2099 usrtvp = SCARG(uap, tptr);
2100 if ((error = getutimes(usrtvp, ts)) != 0)
2101 return (error);
2102 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2103 if ((error = namei(&nd)) != 0)
2104 return (error);
2105 NDFREE(&nd, NDF_ONLY_PNBUF);
2106 error = setutimes(nd.ni_vp, ts, usrtvp == NULL);
2107 vrele(nd.ni_vp);
2108 return (error);
2109}
2110
2111/*
2112 * futimes_args(int fd, struct timeval *tptr)
2113 *
2114 * Set the access and modification times of a file.
2115 */
2116/* ARGSUSED */
2117int
2118futimes(struct futimes_args *uap)
2119{
2120 struct proc *p = curproc;
2121 struct timespec ts[2];
2122 struct file *fp;
2123 struct timeval *usrtvp;
2124 int error;
2125
2126 usrtvp = SCARG(uap, tptr);
2127 if ((error = getutimes(usrtvp, ts)) != 0)
2128 return (error);
2129 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2130 return (error);
2131 return setutimes((struct vnode *)fp->f_data, ts, usrtvp == NULL);
2132}
2133
2134/*
2135 * truncate(char *path, int pad, off_t length)
2136 *
2137 * Truncate a file given its path name.
2138 */
2139/* ARGSUSED */
2140int
2141truncate(struct truncate_args *uap)
2142{
2143 struct proc *p = curproc;
2144 struct vnode *vp;
2145 struct vattr vattr;
2146 int error;
2147 struct nameidata nd;
2148
2149 if (uap->length < 0)
2150 return(EINVAL);
2151 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2152 if ((error = namei(&nd)) != 0)
2153 return (error);
2154 vp = nd.ni_vp;
2155 NDFREE(&nd, NDF_ONLY_PNBUF);
2156 VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2157 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2158 if (vp->v_type == VDIR)
2159 error = EISDIR;
2160 else if ((error = vn_writechk(vp)) == 0 &&
2161 (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
2162 VATTR_NULL(&vattr);
2163 vattr.va_size = SCARG(uap, length);
2164 error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2165 }
2166 vput(vp);
2167 return (error);
2168}
2169
2170/*
2171 * ftruncate_args(int fd, int pad, off_t length)
2172 *
2173 * Truncate a file given a file descriptor.
2174 */
2175/* ARGSUSED */
2176int
2177ftruncate(struct ftruncate_args *uap)
2178{
2179 struct proc *p = curproc;
2180 struct vattr vattr;
2181 struct vnode *vp;
2182 struct file *fp;
2183 int error;
2184
2185 if (uap->length < 0)
2186 return(EINVAL);
2187 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2188 return (error);
2189 if ((fp->f_flag & FWRITE) == 0)
2190 return (EINVAL);
2191 vp = (struct vnode *)fp->f_data;
2192 VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2193 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2194 if (vp->v_type == VDIR)
2195 error = EISDIR;
2196 else if ((error = vn_writechk(vp)) == 0) {
2197 VATTR_NULL(&vattr);
2198 vattr.va_size = SCARG(uap, length);
2199 error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
2200 }
2201 VOP_UNLOCK(vp, 0, p);
2202 return (error);
2203}
2204
2205#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2206/*
2207 * otruncate_args(char *path, long length)
2208 *
2209 * Truncate a file given its path name.
2210 */
2211/* ARGSUSED */
2212int
2213otruncate(struct otruncate_args *uap)
2214{
2215 struct truncate_args /* {
2216 syscallarg(char *) path;
2217 syscallarg(int) pad;
2218 syscallarg(off_t) length;
2219 } */ nuap;
2220
2221 SCARG(&nuap, path) = SCARG(uap, path);
2222 SCARG(&nuap, length) = SCARG(uap, length);
2223 return (truncate(&nuap));
2224}
2225
2226/*
2227 * oftruncate_args(int fd, long length)
2228 *
2229 * Truncate a file given a file descriptor.
2230 */
2231/* ARGSUSED */
2232int
2233oftruncate(struct oftruncate_args *uap)
2234{
2235 struct ftruncate_args /* {
2236 syscallarg(int) fd;
2237 syscallarg(int) pad;
2238 syscallarg(off_t) length;
2239 } */ nuap;
2240
2241 SCARG(&nuap, fd) = SCARG(uap, fd);
2242 SCARG(&nuap, length) = SCARG(uap, length);
2243 return (ftruncate(&nuap));
2244}
2245#endif /* COMPAT_43 || COMPAT_SUNOS */
2246
2247/*
2248 * fsync(int fd)
2249 *
2250 * Sync an open file.
2251 */
2252/* ARGSUSED */
2253int
2254fsync(struct fsync_args *uap)
2255{
2256 struct proc *p = curproc;
2257 struct vnode *vp;
2258 struct file *fp;
2259 vm_object_t obj;
2260 int error;
2261
2262 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2263 return (error);
2264 vp = (struct vnode *)fp->f_data;
2265 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2266 if (VOP_GETVOBJECT(vp, &obj) == 0)
2267 vm_object_page_clean(obj, 0, 0, 0);
2268 if ((error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p)) == 0 &&
2269 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP) &&
2270 bioops.io_fsync)
2271 error = (*bioops.io_fsync)(vp);
2272 VOP_UNLOCK(vp, 0, p);
2273 return (error);
2274}
2275
2276/*
2277 * rename_args(char *from, char *to)
2278 *
2279 * Rename files. Source and destination must either both be directories,
2280 * or both not be directories. If target is a directory, it must be empty.
2281 */
2282/* ARGSUSED */
2283int
2284rename(struct rename_args *uap)
2285{
2286 struct proc *p = curproc;
2287 struct vnode *tvp, *fvp, *tdvp;
2288 struct nameidata fromnd, tond;
2289 int error;
2290
2291 bwillwrite();
2292 NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2293 SCARG(uap, from), p);
2294 if ((error = namei(&fromnd)) != 0)
2295 return (error);
2296 fvp = fromnd.ni_vp;
2297 NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
2298 UIO_USERSPACE, SCARG(uap, to), p);
2299 if (fromnd.ni_vp->v_type == VDIR)
2300 tond.ni_cnd.cn_flags |= WILLBEDIR;
2301 if ((error = namei(&tond)) != 0) {
2302 /* Translate error code for rename("dir1", "dir2/."). */
2303 if (error == EISDIR && fvp->v_type == VDIR)
2304 error = EINVAL;
2305 NDFREE(&fromnd, NDF_ONLY_PNBUF);
2306 vrele(fromnd.ni_dvp);
2307 vrele(fvp);
2308 goto out1;
2309 }
2310 tdvp = tond.ni_dvp;
2311 tvp = tond.ni_vp;
2312 if (tvp != NULL) {
2313 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2314 error = ENOTDIR;
2315 goto out;
2316 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2317 error = EISDIR;
2318 goto out;
2319 }
2320 }
2321 if (fvp == tdvp)
2322 error = EINVAL;
2323 /*
2324 * If the source is the same as the destination (that is, if they
2325 * are links to the same vnode), then there is nothing to do.
2326 */
2327 if (fvp == tvp)
2328 error = -1;
2329out:
2330 if (!error) {
2331 VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE);
2332 if (fromnd.ni_dvp != tdvp) {
2333 VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
2334 }
2335 if (tvp) {
2336 VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE);
2337 }
2338 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2339 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2340 NDFREE(&fromnd, NDF_ONLY_PNBUF);
2341 NDFREE(&tond, NDF_ONLY_PNBUF);
2342 } else {
2343 NDFREE(&fromnd, NDF_ONLY_PNBUF);
2344 NDFREE(&tond, NDF_ONLY_PNBUF);
2345 if (tdvp == tvp)
2346 vrele(tdvp);
2347 else
2348 vput(tdvp);
2349 if (tvp)
2350 vput(tvp);
2351 vrele(fromnd.ni_dvp);
2352 vrele(fvp);
2353 }
2354 vrele(tond.ni_startdir);
2355 ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2356 ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2357 ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2358 ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2359out1:
2360 if (fromnd.ni_startdir)
2361 vrele(fromnd.ni_startdir);
2362 if (error == -1)
2363 return (0);
2364 return (error);
2365}
2366
2367/*
2368 * mkdir_args(char *path, int mode)
2369 *
2370 * Make a directory file.
2371 */
2372/* ARGSUSED */
2373int
2374mkdir(struct mkdir_args *uap)
2375{
2376 struct proc *p = curproc;
2377 struct vnode *vp;
2378 struct vattr vattr;
2379 int error;
2380 struct nameidata nd;
2381
2382 bwillwrite();
2383 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
2384 nd.ni_cnd.cn_flags |= WILLBEDIR;
2385 if ((error = namei(&nd)) != 0)
2386 return (error);
2387 vp = nd.ni_vp;
2388 if (vp != NULL) {
2389 NDFREE(&nd, NDF_ONLY_PNBUF);
2390 if (nd.ni_dvp == vp)
2391 vrele(nd.ni_dvp);
2392 else
2393 vput(nd.ni_dvp);
2394 vrele(vp);
2395 return (EEXIST);
2396 }
2397 VATTR_NULL(&vattr);
2398 vattr.va_type = VDIR;
2399 vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask;
2400 VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
2401 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2402 NDFREE(&nd, NDF_ONLY_PNBUF);
2403 vput(nd.ni_dvp);
2404 if (!error)
2405 vput(nd.ni_vp);
2406 ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
2407 ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
2408 return (error);
2409}
2410
2411/*
2412 * rmdir_args(char *path)
2413 *
2414 * Remove a directory file.
2415 */
2416/* ARGSUSED */
2417int
2418rmdir(struct rmdir_args *uap)
2419{
2420 struct proc *p = curproc;
2421 struct vnode *vp;
2422 int error;
2423 struct nameidata nd;
2424
2425 bwillwrite();
2426 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
2427 SCARG(uap, path), p);
2428 if ((error = namei(&nd)) != 0)
2429 return (error);
2430 vp = nd.ni_vp;
2431 if (vp->v_type != VDIR) {
2432 error = ENOTDIR;
2433 goto out;
2434 }
2435 /*
2436 * No rmdir "." please.
2437 */
2438 if (nd.ni_dvp == vp) {
2439 error = EINVAL;
2440 goto out;
2441 }
2442 /*
2443 * The root of a mounted filesystem cannot be deleted.
2444 */
2445 if (vp->v_flag & VROOT)
2446 error = EBUSY;
2447 else {
2448 VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
2449 VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2450 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2451 }
2452out:
2453 NDFREE(&nd, NDF_ONLY_PNBUF);
2454 if (nd.ni_dvp == vp)
2455 vrele(nd.ni_dvp);
2456 else
2457 vput(nd.ni_dvp);
2458 if (vp != NULLVP)
2459 vput(vp);
2460 ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
2461 ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
2462 return (error);
2463}
2464
2465#ifdef COMPAT_43
2466/*
2467 * ogetdirentries_args(int fd, char *buf, u_int count, long *basep)
2468 *
2469 * Read a block of directory entries in a file system independent format.
2470 */
2471int
2472ogetdirentries(struct ogetdirentries_args *uap)
2473{
2474 struct proc *p = curproc;
2475 struct vnode *vp;
2476 struct file *fp;
2477 struct uio auio, kuio;
2478 struct iovec aiov, kiov;
2479 struct dirent *dp, *edp;
2480 caddr_t dirbuf;
2481 int error, eofflag, readcnt;
2482 long loff;
2483
2484 /* XXX arbitrary sanity limit on `count'. */
2485 if (SCARG(uap, count) > 64 * 1024)
2486 return (EINVAL);
2487 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2488 return (error);
2489 if ((fp->f_flag & FREAD) == 0)
2490 return (EBADF);
2491 vp = (struct vnode *)fp->f_data;
2492unionread:
2493 if (vp->v_type != VDIR)
2494 return (EINVAL);
2495 aiov.iov_base = SCARG(uap, buf);
2496 aiov.iov_len = SCARG(uap, count);
2497 auio.uio_iov = &aiov;
2498 auio.uio_iovcnt = 1;
2499 auio.uio_rw = UIO_READ;
2500 auio.uio_segflg = UIO_USERSPACE;
2501 auio.uio_procp = p;
2502 auio.uio_resid = SCARG(uap, count);
2503 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2504 loff = auio.uio_offset = fp->f_offset;
2505# if (BYTE_ORDER != LITTLE_ENDIAN)
2506 if (vp->v_mount->mnt_maxsymlinklen <= 0) {
2507 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
2508 NULL, NULL);
2509 fp->f_offset = auio.uio_offset;
2510 } else
2511# endif
2512 {
2513 kuio = auio;
2514 kuio.uio_iov = &kiov;
2515 kuio.uio_segflg = UIO_SYSSPACE;
2516 kiov.iov_len = SCARG(uap, count);
2517 MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
2518 kiov.iov_base = dirbuf;
2519 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
2520 NULL, NULL);
2521 fp->f_offset = kuio.uio_offset;
2522 if (error == 0) {
2523 readcnt = SCARG(uap, count) - kuio.uio_resid;
2524 edp = (struct dirent *)&dirbuf[readcnt];
2525 for (dp = (struct dirent *)dirbuf; dp < edp; ) {
2526# if (BYTE_ORDER == LITTLE_ENDIAN)
2527 /*
2528 * The expected low byte of
2529 * dp->d_namlen is our dp->d_type.
2530 * The high MBZ byte of dp->d_namlen
2531 * is our dp->d_namlen.
2532 */
2533 dp->d_type = dp->d_namlen;
2534 dp->d_namlen = 0;
2535# else
2536 /*
2537 * The dp->d_type is the high byte
2538 * of the expected dp->d_namlen,
2539 * so must be zero'ed.
2540 */
2541 dp->d_type = 0;
2542# endif
2543 if (dp->d_reclen > 0) {
2544 dp = (struct dirent *)
2545 ((char *)dp + dp->d_reclen);
2546 } else {
2547 error = EIO;
2548 break;
2549 }
2550 }
2551 if (dp >= edp)
2552 error = uiomove(dirbuf, readcnt, &auio);
2553 }
2554 FREE(dirbuf, M_TEMP);
2555 }
2556 VOP_UNLOCK(vp, 0, p);
2557 if (error)
2558 return (error);
2559 if (SCARG(uap, count) == auio.uio_resid) {
2560 if (union_dircheckp) {
2561 error = union_dircheckp(p, &vp, fp);
2562 if (error == -1)
2563 goto unionread;
2564 if (error)
2565 return (error);
2566 }
2567 if ((vp->v_flag & VROOT) &&
2568 (vp->v_mount->mnt_flag & MNT_UNION)) {
2569 struct vnode *tvp = vp;
2570 vp = vp->v_mount->mnt_vnodecovered;
2571 VREF(vp);
2572 fp->f_data = (caddr_t) vp;
2573 fp->f_offset = 0;
2574 vrele(tvp);
2575 goto unionread;
2576 }
2577 }
2578 error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
2579 sizeof(long));
2580 p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
2581 return (error);
2582}
2583#endif /* COMPAT_43 */
2584
2585/*
2586 * getdirentries_args(int fd, char *buf, u_int conut, long *basep)
2587 *
2588 * Read a block of directory entries in a file system independent format.
2589 */
2590int
2591getdirentries(struct getdirentries_args *uap)
2592{
2593 struct proc *p = curproc;
2594 struct vnode *vp;
2595 struct file *fp;
2596 struct uio auio;
2597 struct iovec aiov;
2598 long loff;
2599 int error, eofflag;
2600
2601 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2602 return (error);
2603 if ((fp->f_flag & FREAD) == 0)
2604 return (EBADF);
2605 vp = (struct vnode *)fp->f_data;
2606unionread:
2607 if (vp->v_type != VDIR)
2608 return (EINVAL);
2609 aiov.iov_base = SCARG(uap, buf);
2610 aiov.iov_len = SCARG(uap, count);
2611 auio.uio_iov = &aiov;
2612 auio.uio_iovcnt = 1;
2613 auio.uio_rw = UIO_READ;
2614 auio.uio_segflg = UIO_USERSPACE;
2615 auio.uio_procp = p;
2616 auio.uio_resid = SCARG(uap, count);
2617 /* vn_lock(vp, LK_SHARED | LK_RETRY, p); */
2618 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2619 loff = auio.uio_offset = fp->f_offset;
2620 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
2621 fp->f_offset = auio.uio_offset;
2622 VOP_UNLOCK(vp, 0, p);
2623 if (error)
2624 return (error);
2625 if (SCARG(uap, count) == auio.uio_resid) {
2626 if (union_dircheckp) {
2627 error = union_dircheckp(p, &vp, fp);
2628 if (error == -1)
2629 goto unionread;
2630 if (error)
2631 return (error);
2632 }
2633 if ((vp->v_flag & VROOT) &&
2634 (vp->v_mount->mnt_flag & MNT_UNION)) {
2635 struct vnode *tvp = vp;
2636 vp = vp->v_mount->mnt_vnodecovered;
2637 VREF(vp);
2638 fp->f_data = (caddr_t) vp;
2639 fp->f_offset = 0;
2640 vrele(tvp);
2641 goto unionread;
2642 }
2643 }
2644 if (SCARG(uap, basep) != NULL) {
2645 error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
2646 sizeof(long));
2647 }
2648 p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
2649 return (error);
2650}
2651
2652/*
2653 * getdents_args(int fd, char *buf, size_t count)
2654 */
2655int
2656getdents(struct getdents_args *uap)
2657{
2658 struct getdirentries_args ap;
2659
2660 ap.fd = uap->fd;
2661 ap.buf = uap->buf;
2662 ap.count = uap->count;
2663 ap.basep = NULL;
2664 return getdirentries(&ap);
2665}
2666
2667/*
2668 * umask(int newmask)
2669 *
2670 * Set the mode mask for creation of filesystem nodes.
2671 *
2672 * MP SAFE
2673 */
2674int
2675umask(struct umask_args *uap)
2676{
2677 struct proc *p = curproc;
2678 struct filedesc *fdp;
2679
2680 fdp = p->p_fd;
2681 p->p_retval[0] = fdp->fd_cmask;
2682 fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
2683 return (0);
2684}
2685
2686/*
2687 * revoke(char *path)
2688 *
2689 * Void all references to file by ripping underlying filesystem
2690 * away from vnode.
2691 */
2692/* ARGSUSED */
2693int
2694revoke(struct revoke_args *uap)
2695{
2696 struct proc *p = curproc;
2697 struct vnode *vp;
2698 struct vattr vattr;
2699 int error;
2700 struct nameidata nd;
2701
2702 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2703 if ((error = namei(&nd)) != 0)
2704 return (error);
2705 vp = nd.ni_vp;
2706 NDFREE(&nd, NDF_ONLY_PNBUF);
2707 if (vp->v_type != VCHR && vp->v_type != VBLK) {
2708 error = EINVAL;
2709 goto out;
2710 }
2711 if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
2712 goto out;
2713 if (p->p_ucred->cr_uid != vattr.va_uid &&
2714 (error = suser_xxx(0, PRISON_ROOT)))
2715 goto out;
2716 if (vcount(vp) > 1)
2717 VOP_REVOKE(vp, REVOKEALL);
2718out:
2719 vrele(vp);
2720 return (error);
2721}
2722
2723/*
2724 * Convert a user file descriptor to a kernel file entry.
2725 */
2726int
2727getvnode(struct filedesc *fdp, int fd, struct file **fpp)
2728{
2729 struct file *fp;
2730
2731 if ((u_int)fd >= fdp->fd_nfiles ||
2732 (fp = fdp->fd_ofiles[fd]) == NULL)
2733 return (EBADF);
2734 if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO)
2735 return (EINVAL);
2736 *fpp = fp;
2737 return (0);
2738}
2739/*
2740 * getfh_args(char *fname, fhandle_t *fhp)
2741 *
2742 * Get (NFS) file handle
2743 */
2744int
2745getfh(struct getfh_args *uap)
2746{
2747 struct proc *p = curproc;
2748 struct nameidata nd;
2749 fhandle_t fh;
2750 struct vnode *vp;
2751 int error;
2752
2753 /*
2754 * Must be super user
2755 */
2756 error = suser();
2757 if (error)
2758 return (error);
2759 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p);
2760 error = namei(&nd);
2761 if (error)
2762 return (error);
2763 NDFREE(&nd, NDF_ONLY_PNBUF);
2764 vp = nd.ni_vp;
2765 bzero(&fh, sizeof(fh));
2766 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
2767 error = VFS_VPTOFH(vp, &fh.fh_fid);
2768 vput(vp);
2769 if (error)
2770 return (error);
2771 error = copyout(&fh, uap->fhp, sizeof (fh));
2772 return (error);
2773}
2774
2775/*
2776 * fhopen_args(const struct fhandle *u_fhp, int flags)
2777 *
2778 * syscall for the rpc.lockd to use to translate a NFS file handle into
2779 * an open descriptor.
2780 *
2781 * warning: do not remove the suser() call or this becomes one giant
2782 * security hole.
2783 */
2784int
2785fhopen(struct fhopen_args *uap)
2786{
2787 struct proc *p = curproc;
2788 struct mount *mp;
2789 struct vnode *vp;
2790 struct fhandle fhp;
2791 struct vattr vat;
2792 struct vattr *vap = &vat;
2793 struct flock lf;
2794 struct file *fp;
2795 struct filedesc *fdp = p->p_fd;
2796 int fmode, mode, error, type;
2797 struct file *nfp;
2798 int indx;
2799
2800 /*
2801 * Must be super user
2802 */
2803 error = suser();
2804 if (error)
2805 return (error);
2806
2807 fmode = FFLAGS(SCARG(uap, flags));
2808 /* why not allow a non-read/write open for our lockd? */
2809 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
2810 return (EINVAL);
2811 error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
2812 if (error)
2813 return(error);
2814 /* find the mount point */
2815 mp = vfs_getvfs(&fhp.fh_fsid);
2816 if (mp == NULL)
2817 return (ESTALE);
2818 /* now give me my vnode, it gets returned to me locked */
2819 error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
2820 if (error)
2821 return (error);
2822 /*
2823 * from now on we have to make sure not
2824 * to forget about the vnode
2825 * any error that causes an abort must vput(vp)
2826 * just set error = err and 'goto bad;'.
2827 */
2828
2829 /*
2830 * from vn_open
2831 */
2832 if (vp->v_type == VLNK) {
2833 error = EMLINK;
2834 goto bad;
2835 }
2836 if (vp->v_type == VSOCK) {
2837 error = EOPNOTSUPP;
2838 goto bad;
2839 }
2840 mode = 0;
2841 if (fmode & (FWRITE | O_TRUNC)) {
2842 if (vp->v_type == VDIR) {
2843 error = EISDIR;
2844 goto bad;
2845 }
2846 error = vn_writechk(vp);
2847 if (error)
2848 goto bad;
2849 mode |= VWRITE;
2850 }
2851 if (fmode & FREAD)
2852 mode |= VREAD;
2853 if (mode) {
2854 error = VOP_ACCESS(vp, mode, p->p_ucred, p);
2855 if (error)
2856 goto bad;
2857 }
2858 if (fmode & O_TRUNC) {
2859 VOP_UNLOCK(vp, 0, p); /* XXX */
2860 VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2861 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */
2862 VATTR_NULL(vap);
2863 vap->va_size = 0;
2864 error = VOP_SETATTR(vp, vap, p->p_ucred, p);
2865 if (error)
2866 goto bad;
2867 }
2868 error = VOP_OPEN(vp, fmode, p->p_ucred, p);
2869 if (error)
2870 goto bad;
2871 /*
2872 * Make sure that a VM object is created for VMIO support.
2873 */
2874 if (vn_canvmio(vp) == TRUE) {
2875 if ((error = vfs_object_create(vp, p, p->p_ucred)) != 0)
2876 goto bad;
2877 }
2878 if (fmode & FWRITE)
2879 vp->v_writecount++;
2880
2881 /*
2882 * end of vn_open code
2883 */
2884
2885 if ((error = falloc(p, &nfp, &indx)) != 0) {
2886 if (fmode & FWRITE)
2887 vp->v_writecount--;
2888 goto bad;
2889 }
2890 fp = nfp;
2891
2892 /*
2893 * hold an extra reference to avoid having fp ripped out
2894 * from under us while we block in the lock op.
2895 */
2896 fhold(fp);
2897 nfp->f_data = (caddr_t)vp;
2898 nfp->f_flag = fmode & FMASK;
2899 nfp->f_ops = &vnops;
2900 nfp->f_type = DTYPE_VNODE;
2901 if (fmode & (O_EXLOCK | O_SHLOCK)) {
2902 lf.l_whence = SEEK_SET;
2903 lf.l_start = 0;
2904 lf.l_len = 0;
2905 if (fmode & O_EXLOCK)
2906 lf.l_type = F_WRLCK;
2907 else
2908 lf.l_type = F_RDLCK;
2909 type = F_FLOCK;
2910 if ((fmode & FNONBLOCK) == 0)
2911 type |= F_WAIT;
2912 VOP_UNLOCK(vp, 0, p);
2913 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
2914 /*
2915 * lock request failed. Normally close the descriptor
2916 * but handle the case where someone might have dup()d
2917 * or close()d it when we weren't looking.
2918 */
2919 if (fdp->fd_ofiles[indx] == fp) {
2920 fdp->fd_ofiles[indx] = NULL;
2921 fdrop(fp, p);
2922 }
2923
2924 /*
2925 * release our private reference.
2926 */
2927 fdrop(fp, p);
2928 return (error);
2929 }
2930 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2931 fp->f_flag |= FHASLOCK;
2932 }
2933 if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
2934 vfs_object_create(vp, p, p->p_ucred);
2935
2936 VOP_UNLOCK(vp, 0, p);
2937 fdrop(fp, p);
2938 p->p_retval[0] = indx;
2939 return (0);
2940
2941bad:
2942 vput(vp);
2943 return (error);
2944}
2945
2946/*
2947 * fhstat_args(struct fhandle *u_fhp, struct stat *sb)
2948 */
2949int
2950fhstat(struct fhstat_args *uap)
2951{
2952 struct proc *p = curproc;
2953 struct stat sb;
2954 fhandle_t fh;
2955 struct mount *mp;
2956 struct vnode *vp;
2957 int error;
2958
2959 /*
2960 * Must be super user
2961 */
2962 error = suser();
2963 if (error)
2964 return (error);
2965
2966 error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
2967 if (error)
2968 return (error);
2969
2970 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
2971 return (ESTALE);
2972 if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
2973 return (error);
2974 error = vn_stat(vp, &sb, p);
2975 vput(vp);
2976 if (error)
2977 return (error);
2978 error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
2979 return (error);
2980}
2981
2982/*
2983 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf)
2984 */
2985int
2986fhstatfs(struct fhstatfs_args *uap)
2987{
2988 struct proc *p = curproc;
2989 struct statfs *sp;
2990 struct mount *mp;
2991 struct vnode *vp;
2992 struct statfs sb;
2993 fhandle_t fh;
2994 int error;
2995
2996 /*
2997 * Must be super user
2998 */
2999 if ((error = suser()))
3000 return (error);
3001
3002 if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
3003 return (error);
3004
3005 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3006 return (ESTALE);
3007 if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3008 return (error);
3009 mp = vp->v_mount;
3010 sp = &mp->mnt_stat;
3011 vput(vp);
3012 if ((error = VFS_STATFS(mp, sp, p)) != 0)
3013 return (error);
3014 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3015 if (suser_xxx(p->p_ucred, 0)) {
3016 bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
3017 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3018 sp = &sb;
3019 }
3020 return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
3021}
3022
3023/*
3024 * Syscall to push extended attribute configuration information into the
3025 * VFS. Accepts a path, which it converts to a mountpoint, as well as
3026 * a command (int cmd), and attribute name and misc data. For now, the
3027 * attribute name is left in userspace for consumption by the VFS_op.
3028 * It will probably be changed to be copied into sysspace by the
3029 * syscall in the future, once issues with various consumers of the
3030 * attribute code have raised their hands.
3031 *
3032 * Currently this is used only by UFS Extended Attributes.
3033 */
3034int
3035extattrctl(struct extattrctl_args *uap)
3036{
3037 struct proc *p = curproc;
3038 struct nameidata nd;
3039 struct mount *mp;
3040 int error;
3041
3042 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3043 if ((error = namei(&nd)) != 0)
3044 return (error);
3045 mp = nd.ni_vp->v_mount;
3046 NDFREE(&nd, 0);
3047 return (VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname),
3048 SCARG(uap, arg), p));
3049}
3050
3051/*
3052 * Syscall to set a named extended attribute on a file or directory.
3053 * Accepts attribute name, and a uio structure pointing to the data to set.
3054 * The uio is consumed in the style of writev(). The real work happens
3055 * in VOP_SETEXTATTR().
3056 */
3057int
3058extattr_set_file(struct extattr_set_file_args *uap)
3059{
3060 struct proc *p = curproc;
3061 struct nameidata nd;
3062 struct uio auio;
3063 struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
3064 char attrname[EXTATTR_MAXNAMELEN];
3065 u_int iovlen, cnt;
3066 int error, i;
3067
3068 error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
3069 if (error)
3070 return (error);
3071 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3072 SCARG(uap, path), p);
3073 if ((error = namei(&nd)) != 0)
3074 return(error);
3075 iovlen = uap->iovcnt * sizeof(struct iovec);
3076 if (uap->iovcnt > UIO_SMALLIOV) {
3077 if (uap->iovcnt > UIO_MAXIOV) {
3078 error = EINVAL;
3079 goto done;
3080 }
3081 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
3082 needfree = iov;
3083 } else
3084 iov = aiov;
3085 auio.uio_iov = iov;
3086 auio.uio_iovcnt = uap->iovcnt;
3087 auio.uio_rw = UIO_WRITE;
3088 auio.uio_segflg = UIO_USERSPACE;
3089 auio.uio_procp = p;
3090 auio.uio_offset = 0;
3091 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
3092 goto done;
3093 auio.uio_resid = 0;
3094 for (i = 0; i < uap->iovcnt; i++) {
3095 if (iov->iov_len > INT_MAX - auio.uio_resid) {
3096 error = EINVAL;
3097 goto done;
3098 }
3099 auio.uio_resid += iov->iov_len;
3100 iov++;
3101 }
3102 cnt = auio.uio_resid;
3103 error = VOP_SETEXTATTR(nd.ni_vp, attrname, &auio, p->p_ucred, p);
3104 cnt -= auio.uio_resid;
3105 p->p_retval[0] = cnt;
3106done:
3107 if (needfree)
3108 FREE(needfree, M_IOV);
3109 NDFREE(&nd, 0);
3110 return (error);
3111}
3112
3113/*
3114 * Syscall to get a named extended attribute on a file or directory.
3115 * Accepts attribute name, and a uio structure pointing to a buffer for the
3116 * data. The uio is consumed in the style of readv(). The real work
3117 * happens in VOP_GETEXTATTR();
3118 */
3119int
3120extattr_get_file(struct extattr_get_file_args *uap)
3121{
3122 struct proc *p = curproc;
3123 struct nameidata nd;
3124 struct uio auio;
3125 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
3126 char attrname[EXTATTR_MAXNAMELEN];
3127 u_int iovlen, cnt;
3128 int error, i;
3129
3130 error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
3131 if (error)
3132 return (error);
3133 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3134 SCARG(uap, path), p);
3135 if ((error = namei(&nd)) != 0)
3136 return (error);
3137 iovlen = uap->iovcnt * sizeof (struct iovec);
3138 if (uap->iovcnt > UIO_SMALLIOV) {
3139 if (uap->iovcnt > UIO_MAXIOV) {
3140 NDFREE(&nd, 0);
3141 return (EINVAL);
3142 }
3143 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
3144 needfree = iov;
3145 } else {
3146 iov = aiov;
3147 needfree = NULL;
3148 }
3149 auio.uio_iov = iov;
3150 auio.uio_iovcnt = uap->iovcnt;
3151 auio.uio_rw = UIO_READ;
3152 auio.uio_segflg = UIO_USERSPACE;
3153 auio.uio_procp = p;
3154 auio.uio_offset = 0;
3155 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
3156 goto done;
3157 auio.uio_resid = 0;
3158 for (i = 0; i < uap->iovcnt; i++) {
3159 if (iov->iov_len > INT_MAX - auio.uio_resid) {
3160 error = EINVAL;
3161 goto done;
3162 }
3163 auio.uio_resid += iov->iov_len;
3164 iov++;
3165 }
3166 cnt = auio.uio_resid;
3167 error = VOP_GETEXTATTR(nd.ni_vp, attrname, &auio, p->p_ucred, p);
3168 cnt -= auio.uio_resid;
3169 p->p_retval[0] = cnt;
3170done:
3171 if (needfree)
3172 FREE(needfree, M_IOV);
3173 NDFREE(&nd, 0);
3174 return(error);
3175}
3176
3177/*
3178 * Syscall to delete a named extended attribute from a file or directory.
3179 * Accepts attribute name. The real work happens in VOP_SETEXTATTR().
3180 */
3181int
3182extattr_delete_file(struct extattr_delete_file_args *uap)
3183{
3184 struct proc *p = curproc;
3185 struct nameidata nd;
3186 char attrname[EXTATTR_MAXNAMELEN];
3187 int error;
3188
3189 error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
3190 if (error)
3191 return(error);
3192 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3193 SCARG(uap, path), p);
3194 if ((error = namei(&nd)) != 0)
3195 return(error);
3196 error = VOP_SETEXTATTR(nd.ni_vp, attrname, NULL, p->p_ucred, p);
3197 NDFREE(&nd, 0);
3198 return(error);
3199}