VNode sequencing and locking - part 2/4.
[dragonfly.git] / sys / kern / vfs_syscalls.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94
39 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $
40 * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.99 2006/08/11 01:54:59 dillon Exp $
41 */
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/buf.h>
46#include <sys/conf.h>
47#include <sys/sysent.h>
48#include <sys/malloc.h>
49#include <sys/mount.h>
50#include <sys/mountctl.h>
51#include <sys/sysproto.h>
52#include <sys/filedesc.h>
53#include <sys/kernel.h>
54#include <sys/fcntl.h>
55#include <sys/file.h>
56#include <sys/linker.h>
57#include <sys/stat.h>
58#include <sys/unistd.h>
59#include <sys/vnode.h>
60#include <sys/proc.h>
61#include <sys/namei.h>
62#include <sys/nlookup.h>
63#include <sys/dirent.h>
64#include <sys/extattr.h>
65#include <sys/spinlock.h>
66#include <sys/kern_syscall.h>
67#include <sys/objcache.h>
68
69#include <machine/limits.h>
70#include <vfs/union/union.h>
71#include <sys/sysctl.h>
72#include <vm/vm.h>
73#include <vm/vm_object.h>
74#include <vm/vm_page.h>
75
76#include <sys/file2.h>
77#include <sys/spinlock2.h>
78
79static int checkvp_chdir (struct vnode *vn, struct thread *td);
80static void checkdirs (struct vnode *olddp, struct namecache *ncp);
81static int chroot_refuse_vdir_fds (struct filedesc *fdp);
82static int chroot_visible_mnt(struct mount *mp, struct proc *p);
83static int getutimes (const struct timeval *, struct timespec *);
84static int setfown (struct vnode *, uid_t, gid_t);
85static int setfmode (struct vnode *, int);
86static int setfflags (struct vnode *, int);
87static int setutimes (struct vnode *, const struct timespec *, int);
88static int usermount = 0; /* if 1, non-root can mount fs. */
89
90int (*union_dircheckp) (struct thread *, struct vnode **, struct file *);
91
92SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
93
94/*
95 * Virtual File System System Calls
96 */
97
98/*
99 * Mount a file system.
100 */
101/*
102 * mount_args(char *type, char *path, int flags, caddr_t data)
103 */
104/* ARGSUSED */
105int
106sys_mount(struct mount_args *uap)
107{
108 struct thread *td = curthread;
109 struct proc *p = td->td_proc;
110 struct vnode *vp;
111 struct namecache *ncp;
112 struct mount *mp;
113 struct vfsconf *vfsp;
114 int error, flag = 0, flag2 = 0;
115 struct vattr va;
116 struct nlookupdata nd;
117 char fstypename[MFSNAMELEN];
118 struct nlcomponent nlc;
119 struct ucred *cred = p->p_ucred;
120
121 KKASSERT(p);
122 if (cred->cr_prison != NULL)
123 return (EPERM);
124 if (usermount == 0 && (error = suser(td)))
125 return (error);
126 /*
127 * Do not allow NFS export by non-root users.
128 */
129 if (uap->flags & MNT_EXPORTED) {
130 error = suser(td);
131 if (error)
132 return (error);
133 }
134 /*
135 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
136 */
137 if (suser(td))
138 uap->flags |= MNT_NOSUID | MNT_NODEV;
139
140 /*
141 * Lookup the requested path and extract the ncp and vnode.
142 */
143 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
144 if (error == 0) {
145 if ((error = nlookup(&nd)) == 0) {
146 if (nd.nl_ncp->nc_vp == NULL)
147 error = ENOENT;
148 }
149 }
150 if (error) {
151 nlookup_done(&nd);
152 return (error);
153 }
154
155 /*
156 * Extract the locked+refd ncp and cleanup the nd structure
157 */
158 ncp = nd.nl_ncp;
159 nd.nl_ncp = NULL;
160 nlookup_done(&nd);
161
162 /*
163 * now we have the locked ref'd ncp and unreferenced vnode.
164 */
165 vp = ncp->nc_vp;
166 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) {
167 cache_put(ncp);
168 return (error);
169 }
170 cache_unlock(ncp);
171
172 /*
173 * Now we have an unlocked ref'd ncp and a locked ref'd vp
174 */
175 if (uap->flags & MNT_UPDATE) {
176 if ((vp->v_flag & VROOT) == 0) {
177 cache_drop(ncp);
178 vput(vp);
179 return (EINVAL);
180 }
181 mp = vp->v_mount;
182 flag = mp->mnt_flag;
183 flag2 = mp->mnt_kern_flag;
184 /*
185 * We only allow the filesystem to be reloaded if it
186 * is currently mounted read-only.
187 */
188 if ((uap->flags & MNT_RELOAD) &&
189 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
190 cache_drop(ncp);
191 vput(vp);
192 return (EOPNOTSUPP); /* Needs translation */
193 }
194 /*
195 * Only root, or the user that did the original mount is
196 * permitted to update it.
197 */
198 if (mp->mnt_stat.f_owner != cred->cr_uid &&
199 (error = suser(td))) {
200 cache_drop(ncp);
201 vput(vp);
202 return (error);
203 }
204 if (vfs_busy(mp, LK_NOWAIT)) {
205 cache_drop(ncp);
206 vput(vp);
207 return (EBUSY);
208 }
209 if ((vp->v_flag & VMOUNT) != 0 ||
210 vp->v_mountedhere != NULL) {
211 cache_drop(ncp);
212 vfs_unbusy(mp);
213 vput(vp);
214 return (EBUSY);
215 }
216 vp->v_flag |= VMOUNT;
217 mp->mnt_flag |=
218 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
219 VOP_UNLOCK(vp, 0);
220 goto update;
221 }
222 /*
223 * If the user is not root, ensure that they own the directory
224 * onto which we are attempting to mount.
225 */
226 if ((error = VOP_GETATTR(vp, &va)) ||
227 (va.va_uid != cred->cr_uid && (error = suser(td)))) {
228 cache_drop(ncp);
229 vput(vp);
230 return (error);
231 }
232 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) {
233 cache_drop(ncp);
234 vput(vp);
235 return (error);
236 }
237 if (vp->v_type != VDIR) {
238 cache_drop(ncp);
239 vput(vp);
240 return (ENOTDIR);
241 }
242 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) {
243 cache_drop(ncp);
244 vput(vp);
245 return (error);
246 }
247 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
248 if (!strcmp(vfsp->vfc_name, fstypename))
249 break;
250 }
251 if (vfsp == NULL) {
252 linker_file_t lf;
253
254 /* Only load modules for root (very important!) */
255 if ((error = suser(td)) != 0) {
256 cache_drop(ncp);
257 vput(vp);
258 return error;
259 }
260 error = linker_load_file(fstypename, &lf);
261 if (error || lf == NULL) {
262 cache_drop(ncp);
263 vput(vp);
264 if (lf == NULL)
265 error = ENODEV;
266 return error;
267 }
268 lf->userrefs++;
269 /* lookup again, see if the VFS was loaded */
270 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
271 if (!strcmp(vfsp->vfc_name, fstypename))
272 break;
273 }
274 if (vfsp == NULL) {
275 lf->userrefs--;
276 linker_file_unload(lf);
277 cache_drop(ncp);
278 vput(vp);
279 return (ENODEV);
280 }
281 }
282 if ((vp->v_flag & VMOUNT) != 0 ||
283 vp->v_mountedhere != NULL) {
284 cache_drop(ncp);
285 vput(vp);
286 return (EBUSY);
287 }
288 vp->v_flag |= VMOUNT;
289
290 /*
291 * Allocate and initialize the filesystem.
292 */
293 mp = malloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK);
294 TAILQ_INIT(&mp->mnt_nvnodelist);
295 TAILQ_INIT(&mp->mnt_reservedvnlist);
296 TAILQ_INIT(&mp->mnt_jlist);
297 mp->mnt_nvnodelistsize = 0;
298 lockinit(&mp->mnt_lock, "vfslock", 0, 0);
299 vfs_busy(mp, LK_NOWAIT);
300 mp->mnt_op = vfsp->vfc_vfsops;
301 mp->mnt_vfc = vfsp;
302 vfsp->vfc_refcount++;
303 mp->mnt_stat.f_type = vfsp->vfc_typenum;
304 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
305 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
306 mp->mnt_vnodecovered = vp;
307 mp->mnt_stat.f_owner = cred->cr_uid;
308 mp->mnt_iosize_max = DFLTPHYS;
309 VOP_UNLOCK(vp, 0);
310update:
311 /*
312 * Set the mount level flags.
313 */
314 if (uap->flags & MNT_RDONLY)
315 mp->mnt_flag |= MNT_RDONLY;
316 else if (mp->mnt_flag & MNT_RDONLY)
317 mp->mnt_kern_flag |= MNTK_WANTRDWR;
318 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
319 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME |
320 MNT_NOSYMFOLLOW | MNT_IGNORE |
321 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
322 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC |
323 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE |
324 MNT_NOSYMFOLLOW | MNT_IGNORE |
325 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
326 /*
327 * Mount the filesystem.
328 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
329 * get.
330 */
331 error = VFS_MOUNT(mp, uap->path, uap->data, cred);
332 if (mp->mnt_flag & MNT_UPDATE) {
333 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
334 mp->mnt_flag &= ~MNT_RDONLY;
335 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
336 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
337 if (error) {
338 mp->mnt_flag = flag;
339 mp->mnt_kern_flag = flag2;
340 }
341 vfs_unbusy(mp);
342 vp->v_flag &= ~VMOUNT;
343 vrele(vp);
344 cache_drop(ncp);
345 return (error);
346 }
347 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
348 /*
349 * Put the new filesystem on the mount list after root. The mount
350 * point gets its own mnt_ncp which is a special ncp linking the
351 * vnode-under to the root of the new mount. The lookup code
352 * detects the mount point going forward and detects the special
353 * mnt_ncp via NCP_MOUNTPT going backwards.
354 *
355 * It is not necessary to invalidate or purge the vnode underneath
356 * because elements under the mount will be given their own glue
357 * namecache record.
358 */
359 if (!error) {
360 nlc.nlc_nameptr = "";
361 nlc.nlc_namelen = 0;
362 mp->mnt_ncp = cache_nlookup(ncp, &nlc);
363 cache_setunresolved(mp->mnt_ncp);
364 mp->mnt_ncp->nc_flag |= NCF_MOUNTPT;
365 mp->mnt_ncp->nc_mount = mp;
366 cache_drop(ncp);
367 /* XXX get the root of the fs and cache_setvp(mnt_ncp...) */
368 vp->v_flag &= ~VMOUNT;
369 vp->v_mountedhere = mp;
370 mountlist_insert(mp, MNTINS_LAST);
371 checkdirs(vp, mp->mnt_ncp);
372 cache_unlock(mp->mnt_ncp); /* leave ref intact */
373 VOP_UNLOCK(vp, 0);
374 error = vfs_allocate_syncvnode(mp);
375 vfs_unbusy(mp);
376 if ((error = VFS_START(mp, 0)) != 0)
377 vrele(vp);
378 } else {
379 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops);
380 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops);
381 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
382 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
383 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
384 vp->v_flag &= ~VMOUNT;
385 mp->mnt_vfc->vfc_refcount--;
386 vfs_unbusy(mp);
387 free(mp, M_MOUNT);
388 cache_drop(ncp);
389 vput(vp);
390 }
391 return (error);
392}
393
394/*
395 * Scan all active processes to see if any of them have a current
396 * or root directory onto which the new filesystem has just been
397 * mounted. If so, replace them with the new mount point.
398 *
399 * The passed ncp is ref'd and locked (from the mount code) and
400 * must be associated with the vnode representing the root of the
401 * mount point.
402 */
403struct checkdirs_info {
404 struct vnode *olddp;
405 struct vnode *newdp;
406 struct namecache *ncp;
407};
408
409static int checkdirs_callback(struct proc *p, void *data);
410
411static void
412checkdirs(struct vnode *olddp, struct namecache *ncp)
413{
414 struct checkdirs_info info;
415 struct vnode *newdp;
416 struct mount *mp;
417
418 if (olddp->v_usecount == 1)
419 return;
420 mp = olddp->v_mountedhere;
421 if (VFS_ROOT(mp, &newdp))
422 panic("mount: lost mount");
423 cache_setvp(ncp, newdp);
424
425 if (rootvnode == olddp) {
426 vref(newdp);
427 vfs_cache_setroot(newdp, cache_hold(ncp));
428 }
429
430 info.olddp = olddp;
431 info.newdp = newdp;
432 info.ncp = ncp;
433 allproc_scan(checkdirs_callback, &info);
434 vput(newdp);
435}
436
437/*
438 * NOTE: callback is not MP safe because the scanned process's filedesc
439 * structure can be ripped out from under us, amoung other things.
440 */
441static int
442checkdirs_callback(struct proc *p, void *data)
443{
444 struct checkdirs_info *info = data;
445 struct filedesc *fdp;
446 struct namecache *ncdrop1;
447 struct namecache *ncdrop2;
448 struct vnode *vprele1;
449 struct vnode *vprele2;
450
451 if ((fdp = p->p_fd) != NULL) {
452 ncdrop1 = NULL;
453 ncdrop2 = NULL;
454 vprele1 = NULL;
455 vprele2 = NULL;
456
457 /*
458 * MPUNSAFE - XXX fdp can be pulled out from under a
459 * foreign process.
460 *
461 * A shared filedesc is ok, we don't have to copy it
462 * because we are making this change globally.
463 */
464 spin_lock_wr(&fdp->fd_spin);
465 if (fdp->fd_cdir == info->olddp) {
466 vprele1 = fdp->fd_cdir;
467 vref(info->newdp);
468 fdp->fd_cdir = info->newdp;
469 ncdrop1 = fdp->fd_ncdir;
470 fdp->fd_ncdir = cache_hold(info->ncp);
471 }
472 if (fdp->fd_rdir == info->olddp) {
473 vprele2 = fdp->fd_rdir;
474 vref(info->newdp);
475 fdp->fd_rdir = info->newdp;
476 ncdrop2 = fdp->fd_nrdir;
477 fdp->fd_nrdir = cache_hold(info->ncp);
478 }
479 spin_unlock_wr(&fdp->fd_spin);
480 if (ncdrop1)
481 cache_drop(ncdrop1);
482 if (ncdrop2)
483 cache_drop(ncdrop2);
484 if (vprele1)
485 vrele(vprele1);
486 if (vprele2)
487 vrele(vprele2);
488 }
489 return(0);
490}
491
492/*
493 * Unmount a file system.
494 *
495 * Note: unmount takes a path to the vnode mounted on as argument,
496 * not special file (as before).
497 */
498/*
499 * umount_args(char *path, int flags)
500 */
501/* ARGSUSED */
502int
503sys_unmount(struct unmount_args *uap)
504{
505 struct thread *td = curthread;
506 struct proc *p = td->td_proc;
507 struct mount *mp = NULL;
508 int error;
509 struct nlookupdata nd;
510
511 KKASSERT(p);
512 if (p->p_ucred->cr_prison != NULL)
513 return (EPERM);
514 if (usermount == 0 && (error = suser(td)))
515 return (error);
516
517 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
518 if (error == 0)
519 error = nlookup(&nd);
520 if (error)
521 goto out;
522
523 mp = nd.nl_ncp->nc_mount;
524
525 /*
526 * Only root, or the user that did the original mount is
527 * permitted to unmount this filesystem.
528 */
529 if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
530 (error = suser(td)))
531 goto out;
532
533 /*
534 * Don't allow unmounting the root file system.
535 */
536 if (mp->mnt_flag & MNT_ROOTFS) {
537 error = EINVAL;
538 goto out;
539 }
540
541 /*
542 * Must be the root of the filesystem
543 */
544 if (! (nd.nl_ncp->nc_flag & NCF_MOUNTPT)) {
545 error = EINVAL;
546 goto out;
547 }
548
549out:
550 nlookup_done(&nd);
551 if (error)
552 return (error);
553 return (dounmount(mp, uap->flags));
554}
555
556/*
557 * Do the actual file system unmount.
558 */
559static int
560dounmount_interlock(struct mount *mp)
561{
562 if (mp->mnt_kern_flag & MNTK_UNMOUNT)
563 return (EBUSY);
564 mp->mnt_kern_flag |= MNTK_UNMOUNT;
565 return(0);
566}
567
568int
569dounmount(struct mount *mp, int flags)
570{
571 struct vnode *coveredvp;
572 int error;
573 int async_flag;
574 int lflags;
575
576 /*
577 * Exclusive access for unmounting purposes
578 */
579 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0)
580 return (error);
581
582 /*
583 * Allow filesystems to detect that a forced unmount is in progress.
584 */
585 if (flags & MNT_FORCE)
586 mp->mnt_kern_flag |= MNTK_UNMOUNTF;
587 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_NOWAIT);
588 error = lockmgr(&mp->mnt_lock, lflags);
589 if (error) {
590 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
591 if (mp->mnt_kern_flag & MNTK_MWAIT)
592 wakeup(mp);
593 return (error);
594 }
595
596 if (mp->mnt_flag & MNT_EXPUBLIC)
597 vfs_setpublicfs(NULL, NULL, NULL);
598
599 vfs_msync(mp, MNT_WAIT);
600 async_flag = mp->mnt_flag & MNT_ASYNC;
601 mp->mnt_flag &=~ MNT_ASYNC;
602 cache_purgevfs(mp); /* remove cache entries for this file sys */
603 if (mp->mnt_syncer != NULL)
604 vrele(mp->mnt_syncer);
605 if (((mp->mnt_flag & MNT_RDONLY) ||
606 (error = VFS_SYNC(mp, MNT_WAIT)) == 0) ||
607 (flags & MNT_FORCE))
608 error = VFS_UNMOUNT(mp, flags);
609 if (error) {
610 if (mp->mnt_syncer == NULL)
611 vfs_allocate_syncvnode(mp);
612 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
613 mp->mnt_flag |= async_flag;
614 lockmgr(&mp->mnt_lock, LK_RELEASE);
615 if (mp->mnt_kern_flag & MNTK_MWAIT)
616 wakeup(mp);
617 return (error);
618 }
619 /*
620 * Clean up any journals still associated with the mount after
621 * filesystem activity has ceased.
622 */
623 journal_remove_all_journals(mp,
624 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0));
625
626 mountlist_remove(mp);
627
628 /*
629 * Remove any installed vnode ops here so the individual VFSs don't
630 * have to.
631 */
632 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops);
633 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops);
634 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
635 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
636 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
637
638 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
639 coveredvp->v_mountedhere = NULL;
640 vrele(coveredvp);
641 cache_drop(mp->mnt_ncp);
642 mp->mnt_ncp = NULL;
643 }
644 mp->mnt_vfc->vfc_refcount--;
645 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
646 panic("unmount: dangling vnode");
647 lockmgr(&mp->mnt_lock, LK_RELEASE);
648 if (mp->mnt_kern_flag & MNTK_MWAIT)
649 wakeup(mp);
650 free(mp, M_MOUNT);
651 return (0);
652}
653
654/*
655 * Sync each mounted filesystem.
656 */
657
658#ifdef DEBUG
659static int syncprt = 0;
660SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
661#endif /* DEBUG */
662
663static int sync_callback(struct mount *mp, void *data);
664
665/* ARGSUSED */
666int
667sys_sync(struct sync_args *uap)
668{
669 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD);
670#ifdef DEBUG
671 /*
672 * print out buffer pool stat information on each sync() call.
673 */
674 if (syncprt)
675 vfs_bufstats();
676#endif /* DEBUG */
677 return (0);
678}
679
680static
681int
682sync_callback(struct mount *mp, void *data __unused)
683{
684 int asyncflag;
685
686 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
687 asyncflag = mp->mnt_flag & MNT_ASYNC;
688 mp->mnt_flag &= ~MNT_ASYNC;
689 vfs_msync(mp, MNT_NOWAIT);
690 VFS_SYNC(mp, MNT_NOWAIT);
691 mp->mnt_flag |= asyncflag;
692 }
693 return(0);
694}
695
696/* XXX PRISON: could be per prison flag */
697static int prison_quotas;
698#if 0
699SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
700#endif
701
702/*
703 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg)
704 *
705 * Change filesystem quotas.
706 */
707/* ARGSUSED */
708int
709sys_quotactl(struct quotactl_args *uap)
710{
711 struct nlookupdata nd;
712 struct thread *td;
713 struct proc *p;
714 struct mount *mp;
715 int error;
716
717 td = curthread;
718 p = td->td_proc;
719 if (p->p_ucred->cr_prison && !prison_quotas)
720 return (EPERM);
721
722 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
723 if (error == 0)
724 error = nlookup(&nd);
725 if (error == 0) {
726 mp = nd.nl_ncp->nc_mount;
727 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid,
728 uap->arg, nd.nl_cred);
729 }
730 nlookup_done(&nd);
731 return (error);
732}
733
734/*
735 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen,
736 * void *buf, int buflen)
737 *
738 * This function operates on a mount point and executes the specified
739 * operation using the specified control data, and possibly returns data.
740 *
741 * The actual number of bytes stored in the result buffer is returned, 0
742 * if none, otherwise an error is returned.
743 */
744/* ARGSUSED */
745int
746sys_mountctl(struct mountctl_args *uap)
747{
748 struct thread *td = curthread;
749 struct proc *p = td->td_proc;
750 struct file *fp;
751 void *ctl = NULL;
752 void *buf = NULL;
753 char *path = NULL;
754 int error;
755
756 /*
757 * Sanity and permissions checks. We must be root.
758 */
759 KKASSERT(p);
760 if (p->p_ucred->cr_prison != NULL)
761 return (EPERM);
762 if ((error = suser(td)) != 0)
763 return (error);
764
765 /*
766 * Argument length checks
767 */
768 if (uap->ctllen < 0 || uap->ctllen > 1024)
769 return (EINVAL);
770 if (uap->buflen < 0 || uap->buflen > 16 * 1024)
771 return (EINVAL);
772 if (uap->path == NULL)
773 return (EINVAL);
774
775 /*
776 * Allocate the necessary buffers and copyin data
777 */
778 path = objcache_get(namei_oc, M_WAITOK);
779 error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
780 if (error)
781 goto done;
782
783 if (uap->ctllen) {
784 ctl = malloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO);
785 error = copyin(uap->ctl, ctl, uap->ctllen);
786 if (error)
787 goto done;
788 }
789 if (uap->buflen)
790 buf = malloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO);
791
792 /*
793 * Validate the descriptor
794 */
795 fp = holdfp(p->p_fd, uap->fd, -1);
796 if (fp == NULL) {
797 error = EBADF;
798 goto done;
799 }
800
801 /*
802 * Execute the internal kernel function and clean up.
803 */
804 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result);
805 if (fp)
806 fdrop(fp);
807 if (error == 0 && uap->sysmsg_result > 0)
808 error = copyout(buf, uap->buf, uap->sysmsg_result);
809done:
810 if (path)
811 objcache_put(namei_oc, path);
812 if (ctl)
813 free(ctl, M_TEMP);
814 if (buf)
815 free(buf, M_TEMP);
816 return (error);
817}
818
819/*
820 * Execute a mount control operation by resolving the path to a mount point
821 * and calling vop_mountctl().
822 */
823int
824kern_mountctl(const char *path, int op, struct file *fp,
825 const void *ctl, int ctllen,
826 void *buf, int buflen, int *res)
827{
828 struct vnode *vp;
829 struct mount *mp;
830 struct nlookupdata nd;
831 int error;
832
833 *res = 0;
834 vp = NULL;
835 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW);
836 if (error == 0)
837 error = nlookup(&nd);
838 if (error == 0)
839 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp);
840 nlookup_done(&nd);
841 if (error)
842 return (error);
843
844 mp = vp->v_mount;
845
846 /*
847 * Must be the root of the filesystem
848 */
849 if ((vp->v_flag & VROOT) == 0) {
850 vput(vp);
851 return (EINVAL);
852 }
853 error = vop_mountctl(mp->mnt_vn_use_ops, op, fp, ctl, ctllen,
854 buf, buflen, res);
855 vput(vp);
856 return (error);
857}
858
859int
860kern_statfs(struct nlookupdata *nd, struct statfs *buf)
861{
862 struct thread *td = curthread;
863 struct proc *p = td->td_proc;
864 struct mount *mp;
865 struct statfs *sp;
866 char *fullpath, *freepath;
867 int error;
868
869 if ((error = nlookup(nd)) != 0)
870 return (error);
871 mp = nd->nl_ncp->nc_mount;
872 sp = &mp->mnt_stat;
873 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0)
874 return (error);
875
876 error = cache_fullpath(p, mp->mnt_ncp, &fullpath, &freepath);
877 if (error)
878 return(error);
879 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
880 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
881 free(freepath, M_TEMP);
882
883 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
884 bcopy(sp, buf, sizeof(*buf));
885 /* Only root should have access to the fsid's. */
886 if (suser(td))
887 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
888 return (0);
889}
890
891/*
892 * statfs_args(char *path, struct statfs *buf)
893 *
894 * Get filesystem statistics.
895 */
896int
897sys_statfs(struct statfs_args *uap)
898{
899 struct nlookupdata nd;
900 struct statfs buf;
901 int error;
902
903 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
904 if (error == 0)
905 error = kern_statfs(&nd, &buf);
906 nlookup_done(&nd);
907 if (error == 0)
908 error = copyout(&buf, uap->buf, sizeof(*uap->buf));
909 return (error);
910}
911
912int
913kern_fstatfs(int fd, struct statfs *buf)
914{
915 struct thread *td = curthread;
916 struct proc *p = td->td_proc;
917 struct file *fp;
918 struct mount *mp;
919 struct statfs *sp;
920 char *fullpath, *freepath;
921 int error;
922
923 KKASSERT(p);
924 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
925 return (error);
926 mp = ((struct vnode *)fp->f_data)->v_mount;
927 if (mp == NULL) {
928 error = EBADF;
929 goto done;
930 }
931 if (fp->f_cred == NULL) {
932 error = EINVAL;
933 goto done;
934 }
935 sp = &mp->mnt_stat;
936 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0)
937 goto done;
938
939 if ((error = cache_fullpath(p, mp->mnt_ncp, &fullpath, &freepath)) != 0)
940 goto done;
941 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
942 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
943 free(freepath, M_TEMP);
944
945 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
946 bcopy(sp, buf, sizeof(*buf));
947
948 /* Only root should have access to the fsid's. */
949 if (suser(td))
950 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
951 error = 0;
952done:
953 fdrop(fp);
954 return (error);
955}
956
957/*
958 * fstatfs_args(int fd, struct statfs *buf)
959 *
960 * Get filesystem statistics.
961 */
962int
963sys_fstatfs(struct fstatfs_args *uap)
964{
965 struct statfs buf;
966 int error;
967
968 error = kern_fstatfs(uap->fd, &buf);
969
970 if (error == 0)
971 error = copyout(&buf, uap->buf, sizeof(*uap->buf));
972 return (error);
973}
974
975/*
976 * getfsstat_args(struct statfs *buf, long bufsize, int flags)
977 *
978 * Get statistics on all filesystems.
979 */
980
981struct getfsstat_info {
982 struct statfs *sfsp;
983 long count;
984 long maxcount;
985 int error;
986 int flags;
987 int is_chrooted;
988 struct proc *p;
989};
990
991static int getfsstat_callback(struct mount *, void *);
992
993/* ARGSUSED */
994int
995sys_getfsstat(struct getfsstat_args *uap)
996{
997 struct thread *td = curthread;
998 struct proc *p = td->td_proc;
999 struct getfsstat_info info;
1000
1001 bzero(&info, sizeof(info));
1002 if (p != NULL && (p->p_fd->fd_nrdir->nc_flag & NCF_ROOT) == 0)
1003 info.is_chrooted = 1;
1004 else
1005 info.is_chrooted = 0;
1006
1007 info.maxcount = uap->bufsize / sizeof(struct statfs);
1008 info.sfsp = uap->buf;
1009 info.count = 0;
1010 info.flags = uap->flags;
1011 info.p = p;
1012
1013 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD);
1014 if (info.sfsp && info.count > info.maxcount)
1015 uap->sysmsg_result = info.maxcount;
1016 else
1017 uap->sysmsg_result = info.count;
1018 return (info.error);
1019}
1020
1021static int
1022getfsstat_callback(struct mount *mp, void *data)
1023{
1024 struct getfsstat_info *info = data;
1025 struct statfs *sp;
1026 char *freepath;
1027 char *fullpath;
1028 int error;
1029
1030 if (info->sfsp && info->count < info->maxcount) {
1031 if (info->is_chrooted && !chroot_visible_mnt(mp, info->p))
1032 return(0);
1033 sp = &mp->mnt_stat;
1034
1035 /*
1036 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1037 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
1038 * overrides MNT_WAIT.
1039 */
1040 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
1041 (info->flags & MNT_WAIT)) &&
1042 (error = VFS_STATFS(mp, sp, info->p->p_ucred))) {
1043 return(0);
1044 }
1045 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1046
1047 error = cache_fullpath(info->p, mp->mnt_ncp,
1048 &fullpath, &freepath);
1049 if (error) {
1050 info->error = error;
1051 return(-1);
1052 }
1053 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1054 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
1055 free(freepath, M_TEMP);
1056
1057 error = copyout(sp, info->sfsp, sizeof(*sp));
1058 if (error) {
1059 info->error = error;
1060 return (-1);
1061 }
1062 ++info->sfsp;
1063 }
1064 info->count++;
1065 return(0);
1066}
1067
1068/*
1069 * fchdir_args(int fd)
1070 *
1071 * Change current working directory to a given file descriptor.
1072 */
1073/* ARGSUSED */
1074int
1075sys_fchdir(struct fchdir_args *uap)
1076{
1077 struct thread *td = curthread;
1078 struct proc *p = td->td_proc;
1079 struct filedesc *fdp = p->p_fd;
1080 struct vnode *vp, *ovp;
1081 struct mount *mp;
1082 struct file *fp;
1083 struct namecache *ncp, *oncp;
1084 struct namecache *nct;
1085 int error;
1086
1087 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0)
1088 return (error);
1089 vp = (struct vnode *)fp->f_data;
1090 vref(vp);
1091 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1092 if (vp->v_type != VDIR || fp->f_ncp == NULL)
1093 error = ENOTDIR;
1094 else
1095 error = VOP_ACCESS(vp, VEXEC, p->p_ucred);
1096 if (error) {
1097 vput(vp);
1098 fdrop(fp);
1099 return (error);
1100 }
1101 ncp = cache_hold(fp->f_ncp);
1102 while (!error && (mp = vp->v_mountedhere) != NULL) {
1103 error = nlookup_mp(mp, &nct);
1104 if (error == 0) {
1105 cache_unlock(nct); /* leave ref intact */
1106 vput(vp);
1107 vp = nct->nc_vp;
1108 error = vget(vp, LK_SHARED);
1109 KKASSERT(error == 0);
1110 cache_drop(ncp);
1111 ncp = nct;
1112 }
1113 }
1114 if (error == 0) {
1115 ovp = fdp->fd_cdir;
1116 oncp = fdp->fd_ncdir;
1117 VOP_UNLOCK(vp, 0); /* leave ref intact */
1118 fdp->fd_cdir = vp;
1119 fdp->fd_ncdir = ncp;
1120 cache_drop(oncp);
1121 vrele(ovp);
1122 } else {
1123 cache_drop(ncp);
1124 vput(vp);
1125 }
1126 fdrop(fp);
1127 return (error);
1128}
1129
1130int
1131kern_chdir(struct nlookupdata *nd)
1132{
1133 struct thread *td = curthread;
1134 struct proc *p = td->td_proc;
1135 struct filedesc *fdp = p->p_fd;
1136 struct vnode *vp, *ovp;
1137 struct namecache *oncp;
1138 int error;
1139
1140 if ((error = nlookup(nd)) != 0)
1141 return (error);
1142 if ((vp = nd->nl_ncp->nc_vp) == NULL)
1143 return (ENOENT);
1144 if ((error = vget(vp, LK_SHARED)) != 0)
1145 return (error);
1146
1147 error = checkvp_chdir(vp, td);
1148 VOP_UNLOCK(vp, 0);
1149 if (error == 0) {
1150 ovp = fdp->fd_cdir;
1151 oncp = fdp->fd_ncdir;
1152 cache_unlock(nd->nl_ncp); /* leave reference intact */
1153 fdp->fd_ncdir = nd->nl_ncp;
1154 fdp->fd_cdir = vp;
1155 cache_drop(oncp);
1156 vrele(ovp);
1157 nd->nl_ncp = NULL;
1158 } else {
1159 vrele(vp);
1160 }
1161 return (error);
1162}
1163
1164/*
1165 * chdir_args(char *path)
1166 *
1167 * Change current working directory (``.'').
1168 */
1169int
1170sys_chdir(struct chdir_args *uap)
1171{
1172 struct nlookupdata nd;
1173 int error;
1174
1175 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
1176 if (error == 0)
1177 error = kern_chdir(&nd);
1178 nlookup_done(&nd);
1179 return (error);
1180}
1181
1182/*
1183 * Helper function for raised chroot(2) security function: Refuse if
1184 * any filedescriptors are open directories.
1185 */
1186static int
1187chroot_refuse_vdir_fds(fdp)
1188 struct filedesc *fdp;
1189{
1190 struct vnode *vp;
1191 struct file *fp;
1192 int error;
1193 int fd;
1194
1195 for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
1196 if ((error = holdvnode(fdp, fd, &fp)) != 0)
1197 continue;
1198 vp = (struct vnode *)fp->f_data;
1199 if (vp->v_type != VDIR) {
1200 fdrop(fp);
1201 continue;
1202 }
1203 fdrop(fp);
1204 return(EPERM);
1205 }
1206 return (0);
1207}
1208
1209/*
1210 * This sysctl determines if we will allow a process to chroot(2) if it
1211 * has a directory open:
1212 * 0: disallowed for all processes.
1213 * 1: allowed for processes that were not already chroot(2)'ed.
1214 * 2: allowed for all processes.
1215 */
1216
1217static int chroot_allow_open_directories = 1;
1218
1219SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
1220 &chroot_allow_open_directories, 0, "");
1221
1222/*
1223 * chroot to the specified namecache entry. We obtain the vp from the
1224 * namecache data. The passed ncp must be locked and referenced and will
1225 * remain locked and referenced on return.
1226 */
1227int
1228kern_chroot(struct namecache *ncp)
1229{
1230 struct thread *td = curthread;
1231 struct proc *p = td->td_proc;
1232 struct filedesc *fdp = p->p_fd;
1233 struct vnode *vp;
1234 int error;
1235
1236 /*
1237 * Only root can chroot
1238 */
1239 if ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0)
1240 return (error);
1241
1242 /*
1243 * Disallow open directory descriptors (fchdir() breakouts).
1244 */
1245 if (chroot_allow_open_directories == 0 ||
1246 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
1247 if ((error = chroot_refuse_vdir_fds(fdp)) != 0)
1248 return (error);
1249 }
1250 if ((vp = ncp->nc_vp) == NULL)
1251 return (ENOENT);
1252
1253 if ((error = vget(vp, LK_SHARED)) != 0)
1254 return (error);
1255
1256 /*
1257 * Check the validity of vp as a directory to change to and
1258 * associate it with rdir/jdir.
1259 */
1260 error = checkvp_chdir(vp, td);
1261 VOP_UNLOCK(vp, 0); /* leave reference intact */
1262 if (error == 0) {
1263 vrele(fdp->fd_rdir);
1264 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */
1265 cache_drop(fdp->fd_nrdir);
1266 fdp->fd_nrdir = cache_hold(ncp);
1267 if (fdp->fd_jdir == NULL) {
1268 fdp->fd_jdir = vp;
1269 vref(fdp->fd_jdir);
1270 fdp->fd_njdir = cache_hold(ncp);
1271 }
1272 } else {
1273 vrele(vp);
1274 }
1275 return (error);
1276}
1277
1278/*
1279 * chroot_args(char *path)
1280 *
1281 * Change notion of root (``/'') directory.
1282 */
1283/* ARGSUSED */
1284int
1285sys_chroot(struct chroot_args *uap)
1286{
1287 struct thread *td = curthread;
1288 struct nlookupdata nd;
1289 int error;
1290
1291 KKASSERT(td->td_proc);
1292 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
1293 if (error) {
1294 nlookup_done(&nd);
1295 return(error);
1296 }
1297 error = nlookup(&nd);
1298 if (error == 0)
1299 error = kern_chroot(nd.nl_ncp);
1300 nlookup_done(&nd);
1301 return(error);
1302}
1303
1304/*
1305 * Common routine for chroot and chdir. Given a locked, referenced vnode,
1306 * determine whether it is legal to chdir to the vnode. The vnode's state
1307 * is not changed by this call.
1308 */
1309int
1310checkvp_chdir(struct vnode *vp, struct thread *td)
1311{
1312 int error;
1313
1314 if (vp->v_type != VDIR)
1315 error = ENOTDIR;
1316 else
1317 error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred);
1318 return (error);
1319}
1320
1321int
1322kern_open(struct nlookupdata *nd, int oflags, int mode, int *res)
1323{
1324 struct thread *td = curthread;
1325 struct proc *p = td->td_proc;
1326 struct lwp *lp = td->td_lwp;
1327 struct filedesc *fdp = p->p_fd;
1328 int cmode, flags;
1329 struct file *nfp;
1330 struct file *fp;
1331 struct vnode *vp;
1332 int type, indx, error;
1333 struct flock lf;
1334
1335 if ((oflags & O_ACCMODE) == O_ACCMODE)
1336 return (EINVAL);
1337 flags = FFLAGS(oflags);
1338 error = falloc(p, &nfp, NULL);
1339 if (error)
1340 return (error);
1341 fp = nfp;
1342 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1343
1344 /*
1345 * XXX p_dupfd is a real mess. It allows a device to return a
1346 * file descriptor to be duplicated rather then doing the open
1347 * itself.
1348 */
1349 lp->lwp_dupfd = -1;
1350
1351 /*
1352 * Call vn_open() to do the lookup and assign the vnode to the
1353 * file pointer. vn_open() does not change the ref count on fp
1354 * and the vnode, on success, will be inherited by the file pointer
1355 * and unlocked.
1356 */
1357 nd->nl_flags |= NLC_LOCKVP;
1358 error = vn_open(nd, fp, flags, cmode);
1359 nlookup_done(nd);
1360 if (error) {
1361 /*
1362 * handle special fdopen() case. bleh. dupfdopen() is
1363 * responsible for dropping the old contents of ofiles[indx]
1364 * if it succeeds.
1365 *
1366 * Note that fsetfd() will add a ref to fp which represents
1367 * the fd_files[] assignment. We must still drop our
1368 * reference.
1369 */
1370 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) {
1371 if (fdalloc(p, 0, &indx) == 0) {
1372 error = dupfdopen(p, indx, lp->lwp_dupfd, flags, error);
1373 if (error == 0) {
1374 *res = indx;
1375 fdrop(fp); /* our ref */
1376 return (0);
1377 }
1378 fsetfd(p, NULL, indx);
1379 }
1380 }
1381 fdrop(fp); /* our ref */
1382 if (error == ERESTART)
1383 error = EINTR;
1384 return (error);
1385 }
1386
1387 /*
1388 * ref the vnode for ourselves so it can't be ripped out from under
1389 * is. XXX need an ND flag to request that the vnode be returned
1390 * anyway.
1391 *
1392 * Reserve a file descriptor but do not assign it until the open
1393 * succeeds.
1394 */
1395 vp = (struct vnode *)fp->f_data;
1396 vref(vp);
1397 if ((error = fdalloc(p, 0, &indx)) != 0) {
1398 fdrop(fp);
1399 vrele(vp);
1400 return (error);
1401 }
1402
1403 /*
1404 * If no error occurs the vp will have been assigned to the file
1405 * pointer.
1406 */
1407 lp->lwp_dupfd = 0;
1408
1409 if (flags & (O_EXLOCK | O_SHLOCK)) {
1410 lf.l_whence = SEEK_SET;
1411 lf.l_start = 0;
1412 lf.l_len = 0;
1413 if (flags & O_EXLOCK)
1414 lf.l_type = F_WRLCK;
1415 else
1416 lf.l_type = F_RDLCK;
1417 if (flags & FNONBLOCK)
1418 type = 0;
1419 else
1420 type = F_WAIT;
1421
1422 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
1423 /*
1424 * lock request failed. Clean up the reserved
1425 * descriptor.
1426 */
1427 vrele(vp);
1428 fsetfd(p, NULL, indx);
1429 fdrop(fp);
1430 return (error);
1431 }
1432 fp->f_flag |= FHASLOCK;
1433 }
1434#if 0
1435 /*
1436 * Assert that all regular file vnodes were created with a object.
1437 */
1438 KASSERT(vp->v_type != VREG || vp->v_object != NULL,
1439 ("open: regular file has no backing object after vn_open"));
1440#endif
1441
1442 vrele(vp);
1443
1444 /*
1445 * release our private reference, leaving the one associated with the
1446 * descriptor table intact.
1447 */
1448 fsetfd(p, fp, indx);
1449 fdrop(fp);
1450 *res = indx;
1451 return (0);
1452}
1453
1454/*
1455 * open_args(char *path, int flags, int mode)
1456 *
1457 * Check permissions, allocate an open file structure,
1458 * and call the device open routine if any.
1459 */
1460int
1461sys_open(struct open_args *uap)
1462{
1463 struct nlookupdata nd;
1464 int error;
1465
1466 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
1467 if (error == 0) {
1468 error = kern_open(&nd, uap->flags,
1469 uap->mode, &uap->sysmsg_result);
1470 }
1471 nlookup_done(&nd);
1472 return (error);
1473}
1474
1475int
1476kern_mknod(struct nlookupdata *nd, int mode, int dev)
1477{
1478 struct namecache *ncp;
1479 struct thread *td = curthread;
1480 struct proc *p = td->td_proc;
1481 struct vnode *vp;
1482 struct vattr vattr;
1483 int error;
1484 int whiteout = 0;
1485
1486 KKASSERT(p);
1487
1488 switch (mode & S_IFMT) {
1489 case S_IFCHR:
1490 case S_IFBLK:
1491 error = suser(td);
1492 break;
1493 default:
1494 error = suser_cred(p->p_ucred, PRISON_ROOT);
1495 break;
1496 }
1497 if (error)
1498 return (error);
1499
1500 bwillwrite();
1501 nd->nl_flags |= NLC_CREATE;
1502 if ((error = nlookup(nd)) != 0)
1503 return (error);
1504 ncp = nd->nl_ncp;
1505 if (ncp->nc_vp)
1506 return (EEXIST);
1507
1508 VATTR_NULL(&vattr);
1509 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
1510 vattr.va_rdev = dev;
1511 whiteout = 0;
1512
1513 switch (mode & S_IFMT) {
1514 case S_IFMT: /* used by badsect to flag bad sectors */
1515 vattr.va_type = VBAD;
1516 break;
1517 case S_IFCHR:
1518 vattr.va_type = VCHR;
1519 break;
1520 case S_IFBLK:
1521 vattr.va_type = VBLK;
1522 break;
1523 case S_IFWHT:
1524 whiteout = 1;
1525 break;
1526 default:
1527 error = EINVAL;
1528 break;
1529 }
1530 if (error == 0) {
1531 if (whiteout) {
1532 error = VOP_NWHITEOUT(ncp, nd->nl_cred, NAMEI_CREATE);
1533 } else {
1534 vp = NULL;
1535 error = VOP_NMKNOD(ncp, &vp, nd->nl_cred, &vattr);
1536 if (error == 0)
1537 vput(vp);
1538 }
1539 }
1540 return (error);
1541}
1542
1543/*
1544 * mknod_args(char *path, int mode, int dev)
1545 *
1546 * Create a special file.
1547 */
1548int
1549sys_mknod(struct mknod_args *uap)
1550{
1551 struct nlookupdata nd;
1552 int error;
1553
1554 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
1555 if (error == 0)
1556 error = kern_mknod(&nd, uap->mode, uap->dev);
1557 nlookup_done(&nd);
1558 return (error);
1559}
1560
1561int
1562kern_mkfifo(struct nlookupdata *nd, int mode)
1563{
1564 struct namecache *ncp;
1565 struct thread *td = curthread;
1566 struct proc *p = td->td_proc;
1567 struct vattr vattr;
1568 struct vnode *vp;
1569 int error;
1570
1571 bwillwrite();
1572
1573 nd->nl_flags |= NLC_CREATE;
1574 if ((error = nlookup(nd)) != 0)
1575 return (error);
1576 ncp = nd->nl_ncp;
1577 if (ncp->nc_vp)
1578 return (EEXIST);
1579
1580 VATTR_NULL(&vattr);
1581 vattr.va_type = VFIFO;
1582 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
1583 vp = NULL;
1584 error = VOP_NMKNOD(ncp, &vp, nd->nl_cred, &vattr);
1585 if (error == 0)
1586 vput(vp);
1587 return (error);
1588}
1589
1590/*
1591 * mkfifo_args(char *path, int mode)
1592 *
1593 * Create a named pipe.
1594 */
1595int
1596sys_mkfifo(struct mkfifo_args *uap)
1597{
1598 struct nlookupdata nd;
1599 int error;
1600
1601 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
1602 if (error == 0)
1603 error = kern_mkfifo(&nd, uap->mode);
1604 nlookup_done(&nd);
1605 return (error);
1606}
1607
1608static int hardlink_check_uid = 0;
1609SYSCTL_INT(_kern, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1610 &hardlink_check_uid, 0,
1611 "Unprivileged processes cannot create hard links to files owned by other "
1612 "users");
1613static int hardlink_check_gid = 0;
1614SYSCTL_INT(_kern, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1615 &hardlink_check_gid, 0,
1616 "Unprivileged processes cannot create hard links to files owned by other "
1617 "groups");
1618
1619static int
1620can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1621{
1622 struct vattr va;
1623 int error;
1624
1625 /*
1626 * Shortcut if disabled
1627 */
1628 if (hardlink_check_uid == 0 && hardlink_check_gid == 0)
1629 return (0);
1630
1631 /*
1632 * root cred can always hardlink
1633 */
1634 if (suser_cred(cred, PRISON_ROOT) == 0)
1635 return (0);
1636
1637 /*
1638 * Otherwise only if the originating file is owned by the
1639 * same user or group. Note that any group is allowed if
1640 * the file is owned by the caller.
1641 */
1642 error = VOP_GETATTR(vp, &va);
1643 if (error != 0)
1644 return (error);
1645
1646 if (hardlink_check_uid) {
1647 if (cred->cr_uid != va.va_uid)
1648 return (EPERM);
1649 }
1650
1651 if (hardlink_check_gid) {
1652 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred))
1653 return (EPERM);
1654 }
1655
1656 return (0);
1657}
1658
1659int
1660kern_link(struct nlookupdata *nd, struct nlookupdata *linknd)
1661{
1662 struct thread *td = curthread;
1663 struct vnode *vp;
1664 int error;
1665
1666 /*
1667 * Lookup the source and obtained a locked vnode.
1668 *
1669 * XXX relookup on vget failure / race ?
1670 */
1671 bwillwrite();
1672 if ((error = nlookup(nd)) != 0)
1673 return (error);
1674 vp = nd->nl_ncp->nc_vp;
1675 KKASSERT(vp != NULL);
1676 if (vp->v_type == VDIR)
1677 return (EPERM); /* POSIX */
1678 if ((error = vget(vp, LK_EXCLUSIVE)) != 0)
1679 return (error);
1680
1681 /*
1682 * Unlock the source so we can lookup the target without deadlocking
1683 * (XXX vp is locked already, possible other deadlock?). The target
1684 * must not exist.
1685 */
1686 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED);
1687 nd->nl_flags &= ~NLC_NCPISLOCKED;
1688 cache_unlock(nd->nl_ncp);
1689
1690 linknd->nl_flags |= NLC_CREATE;
1691 if ((error = nlookup(linknd)) != 0) {
1692 vput(vp);
1693 return (error);
1694 }
1695 if (linknd->nl_ncp->nc_vp) {
1696 vput(vp);
1697 return (EEXIST);
1698 }
1699
1700 /*
1701 * Finally run the new API VOP.
1702 */
1703 error = can_hardlink(vp, td, td->td_proc->p_ucred);
1704 if (error == 0)
1705 error = VOP_NLINK(linknd->nl_ncp, vp, linknd->nl_cred);
1706 vput(vp);
1707 return (error);
1708}
1709
1710/*
1711 * link_args(char *path, char *link)
1712 *
1713 * Make a hard file link.
1714 */
1715int
1716sys_link(struct link_args *uap)
1717{
1718 struct nlookupdata nd, linknd;
1719 int error;
1720
1721 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
1722 if (error == 0) {
1723 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0);
1724 if (error == 0)
1725 error = kern_link(&nd, &linknd);
1726 nlookup_done(&linknd);
1727 }
1728 nlookup_done(&nd);
1729 return (error);
1730}
1731
1732int
1733kern_symlink(struct nlookupdata *nd, char *path, int mode)
1734{
1735 struct namecache *ncp;
1736 struct vattr vattr;
1737 struct vnode *vp;
1738 int error;
1739
1740 bwillwrite();
1741 nd->nl_flags |= NLC_CREATE;
1742 if ((error = nlookup(nd)) != 0)
1743 return (error);
1744 ncp = nd->nl_ncp;
1745 if (ncp->nc_vp)
1746 return (EEXIST);
1747
1748 VATTR_NULL(&vattr);
1749 vattr.va_mode = mode;
1750 error = VOP_NSYMLINK(ncp, &vp, nd->nl_cred, &vattr, path);
1751 if (error == 0)
1752 vput(vp);
1753 return (error);
1754}
1755
1756/*
1757 * symlink(char *path, char *link)
1758 *
1759 * Make a symbolic link.
1760 */
1761int
1762sys_symlink(struct symlink_args *uap)
1763{
1764 struct thread *td = curthread;
1765 struct nlookupdata nd;
1766 char *path;
1767 int error;
1768 int mode;
1769
1770 path = objcache_get(namei_oc, M_WAITOK);
1771 error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
1772 if (error == 0) {
1773 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0);
1774 if (error == 0) {
1775 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask;
1776 error = kern_symlink(&nd, path, mode);
1777 }
1778 nlookup_done(&nd);
1779 }
1780 objcache_put(namei_oc, path);
1781 return (error);
1782}
1783
1784/*
1785 * undelete_args(char *path)
1786 *
1787 * Delete a whiteout from the filesystem.
1788 */
1789/* ARGSUSED */
1790int
1791sys_undelete(struct undelete_args *uap)
1792{
1793 struct nlookupdata nd;
1794 int error;
1795
1796 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
1797 bwillwrite();
1798 nd.nl_flags |= NLC_DELETE;
1799 if (error == 0)
1800 error = nlookup(&nd);
1801 if (error == 0)
1802 error = VOP_NWHITEOUT(nd.nl_ncp, nd.nl_cred, NAMEI_DELETE);
1803 nlookup_done(&nd);
1804 return (error);
1805}
1806
1807int
1808kern_unlink(struct nlookupdata *nd)
1809{
1810 struct namecache *ncp;
1811 int error;
1812
1813 bwillwrite();
1814 nd->nl_flags |= NLC_DELETE;
1815 if ((error = nlookup(nd)) != 0)
1816 return (error);
1817 ncp = nd->nl_ncp;
1818 error = VOP_NREMOVE(ncp, nd->nl_cred);
1819 return (error);
1820}
1821
1822/*
1823 * unlink_args(char *path)
1824 *
1825 * Delete a name from the filesystem.
1826 */
1827int
1828sys_unlink(struct unlink_args *uap)
1829{
1830 struct nlookupdata nd;
1831 int error;
1832
1833 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
1834 if (error == 0)
1835 error = kern_unlink(&nd);
1836 nlookup_done(&nd);
1837 return (error);
1838}
1839
1840int
1841kern_lseek(int fd, off_t offset, int whence, off_t *res)
1842{
1843 struct thread *td = curthread;
1844 struct proc *p = td->td_proc;
1845 struct file *fp;
1846 struct vattr vattr;
1847 int error;
1848
1849 fp = holdfp(p->p_fd, fd, -1);
1850 if (fp == NULL)
1851 return (EBADF);
1852 if (fp->f_type != DTYPE_VNODE) {
1853 error = ESPIPE;
1854 goto done;
1855 }
1856
1857 switch (whence) {
1858 case L_INCR:
1859 fp->f_offset += offset;
1860 error = 0;
1861 break;
1862 case L_XTND:
1863 error = VOP_GETATTR((struct vnode *)fp->f_data, &vattr);
1864 if (error == 0)
1865 fp->f_offset = offset + vattr.va_size;
1866 break;
1867 case L_SET:
1868 fp->f_offset = offset;
1869 error = 0;
1870 break;
1871 default:
1872 error = EINVAL;
1873 break;
1874 }
1875 *res = fp->f_offset;
1876done:
1877 fdrop(fp);
1878 return (error);
1879}
1880
1881/*
1882 * lseek_args(int fd, int pad, off_t offset, int whence)
1883 *
1884 * Reposition read/write file offset.
1885 */
1886int
1887sys_lseek(struct lseek_args *uap)
1888{
1889 int error;
1890
1891 error = kern_lseek(uap->fd, uap->offset, uap->whence,
1892 &uap->sysmsg_offset);
1893
1894 return (error);
1895}
1896
1897int
1898kern_access(struct nlookupdata *nd, int aflags)
1899{
1900 struct vnode *vp;
1901 int error, flags;
1902
1903 if ((error = nlookup(nd)) != 0)
1904 return (error);
1905retry:
1906 error = cache_vget(nd->nl_ncp, nd->nl_cred, LK_EXCLUSIVE, &vp);
1907 if (error)
1908 return (error);
1909
1910 /* Flags == 0 means only check for existence. */
1911 if (aflags) {
1912 flags = 0;
1913 if (aflags & R_OK)
1914 flags |= VREAD;
1915 if (aflags & W_OK)
1916 flags |= VWRITE;
1917 if (aflags & X_OK)
1918 flags |= VEXEC;
1919 if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1920 error = VOP_ACCESS(vp, flags, nd->nl_cred);
1921
1922 /*
1923 * If the file handle is stale we have to re-resolve the
1924 * entry. This is a hack at the moment.
1925 */
1926 if (error == ESTALE) {
1927 cache_setunresolved(nd->nl_ncp);
1928 error = cache_resolve(nd->nl_ncp, nd->nl_cred);
1929 if (error == 0) {
1930 vput(vp);
1931 vp = NULL;
1932 goto retry;
1933 }
1934 }
1935 }
1936 vput(vp);
1937 return (error);
1938}
1939
1940/*
1941 * access_args(char *path, int flags)
1942 *
1943 * Check access permissions.
1944 */
1945int
1946sys_access(struct access_args *uap)
1947{
1948 struct nlookupdata nd;
1949 int error;
1950
1951 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
1952 if (error == 0)
1953 error = kern_access(&nd, uap->flags);
1954 nlookup_done(&nd);
1955 return (error);
1956}
1957
1958int
1959kern_stat(struct nlookupdata *nd, struct stat *st)
1960{
1961 int error;
1962 struct vnode *vp;
1963 thread_t td;
1964
1965 if ((error = nlookup(nd)) != 0)
1966 return (error);
1967again:
1968 if ((vp = nd->nl_ncp->nc_vp) == NULL)
1969 return (ENOENT);
1970
1971 td = curthread;
1972 if ((error = vget(vp, LK_SHARED)) != 0)
1973 return (error);
1974 error = vn_stat(vp, st, nd->nl_cred);
1975
1976 /*
1977 * If the file handle is stale we have to re-resolve the entry. This
1978 * is a hack at the moment.
1979 */
1980 if (error == ESTALE) {
1981 cache_setunresolved(nd->nl_ncp);
1982 error = cache_resolve(nd->nl_ncp, nd->nl_cred);
1983 if (error == 0) {
1984 vput(vp);
1985 goto again;
1986 }
1987 }
1988 vput(vp);
1989 return (error);
1990}
1991
1992/*
1993 * stat_args(char *path, struct stat *ub)
1994 *
1995 * Get file status; this version follows links.
1996 */
1997int
1998sys_stat(struct stat_args *uap)
1999{
2000 struct nlookupdata nd;
2001 struct stat st;
2002 int error;
2003
2004 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2005 if (error == 0) {
2006 error = kern_stat(&nd, &st);
2007 if (error == 0)
2008 error = copyout(&st, uap->ub, sizeof(*uap->ub));
2009 }
2010 nlookup_done(&nd);
2011 return (error);
2012}
2013
2014/*
2015 * lstat_args(char *path, struct stat *ub)
2016 *
2017 * Get file status; this version does not follow links.
2018 */
2019int
2020sys_lstat(struct lstat_args *uap)
2021{
2022 struct nlookupdata nd;
2023 struct stat st;
2024 int error;
2025
2026 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2027 if (error == 0) {
2028 error = kern_stat(&nd, &st);
2029 if (error == 0)
2030 error = copyout(&st, uap->ub, sizeof(*uap->ub));
2031 }
2032 nlookup_done(&nd);
2033 return (error);
2034}
2035
2036/*
2037 * pathconf_Args(char *path, int name)
2038 *
2039 * Get configurable pathname variables.
2040 */
2041/* ARGSUSED */
2042int
2043sys_pathconf(struct pathconf_args *uap)
2044{
2045 struct nlookupdata nd;
2046 struct vnode *vp;
2047 int error;
2048
2049 vp = NULL;
2050 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2051 if (error == 0)
2052 error = nlookup(&nd);
2053 if (error == 0)
2054 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp);
2055 nlookup_done(&nd);
2056 if (error == 0) {
2057 error = VOP_PATHCONF(vp, uap->name, uap->sysmsg_fds);
2058 vput(vp);
2059 }
2060 return (error);
2061}
2062
2063/*
2064 * XXX: daver
2065 * kern_readlink isn't properly split yet. There is a copyin burried
2066 * in VOP_READLINK().
2067 */
2068int
2069kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res)
2070{
2071 struct thread *td = curthread;
2072 struct proc *p = td->td_proc;
2073 struct vnode *vp;
2074 struct iovec aiov;
2075 struct uio auio;
2076 int error;
2077
2078 if ((error = nlookup(nd)) != 0)
2079 return (error);
2080 error = cache_vget(nd->nl_ncp, nd->nl_cred, LK_EXCLUSIVE, &vp);
2081 if (error)
2082 return (error);
2083 if (vp->v_type != VLNK) {
2084 error = EINVAL;
2085 } else {
2086 aiov.iov_base = buf;
2087 aiov.iov_len = count;
2088 auio.uio_iov = &aiov;
2089 auio.uio_iovcnt = 1;
2090 auio.uio_offset = 0;
2091 auio.uio_rw = UIO_READ;
2092 auio.uio_segflg = UIO_USERSPACE;
2093 auio.uio_td = td;
2094 auio.uio_resid = count;
2095 error = VOP_READLINK(vp, &auio, p->p_ucred);
2096 }
2097 vput(vp);
2098 *res = count - auio.uio_resid;
2099 return (error);
2100}
2101
2102/*
2103 * readlink_args(char *path, char *buf, int count)
2104 *
2105 * Return target name of a symbolic link.
2106 */
2107int
2108sys_readlink(struct readlink_args *uap)
2109{
2110 struct nlookupdata nd;
2111 int error;
2112
2113 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2114 if (error == 0) {
2115 error = kern_readlink(&nd, uap->buf, uap->count,
2116 &uap->sysmsg_result);
2117 }
2118 nlookup_done(&nd);
2119 return (error);
2120}
2121
2122static int
2123setfflags(struct vnode *vp, int flags)
2124{
2125 struct thread *td = curthread;
2126 struct proc *p = td->td_proc;
2127 int error;
2128 struct vattr vattr;
2129
2130 /*
2131 * Prevent non-root users from setting flags on devices. When
2132 * a device is reused, users can retain ownership of the device
2133 * if they are allowed to set flags and programs assume that
2134 * chown can't fail when done as root.
2135 */
2136 if ((vp->v_type == VCHR || vp->v_type == VBLK) &&
2137 ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0))
2138 return (error);
2139
2140 /*
2141 * note: vget is required for any operation that might mod the vnode
2142 * so VINACTIVE is properly cleared.
2143 */
2144 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
2145 VATTR_NULL(&vattr);
2146 vattr.va_flags = flags;
2147 error = VOP_SETATTR(vp, &vattr, p->p_ucred);
2148 vput(vp);
2149 }
2150 return (error);
2151}
2152
2153/*
2154 * chflags(char *path, int flags)
2155 *
2156 * Change flags of a file given a path name.
2157 */
2158/* ARGSUSED */
2159int
2160sys_chflags(struct chflags_args *uap)
2161{
2162 struct nlookupdata nd;
2163 struct vnode *vp;
2164 int error;
2165
2166 vp = NULL;
2167 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2168 /* XXX Add NLC flag indicating modifying operation? */
2169 if (error == 0)
2170 error = nlookup(&nd);
2171 if (error == 0)
2172 error = cache_vref(nd.nl_ncp, nd.nl_cred, &vp);
2173 nlookup_done(&nd);
2174 if (error == 0) {
2175 error = setfflags(vp, uap->flags);
2176 vrele(vp);
2177 }
2178 return (error);
2179}
2180
2181/*
2182 * fchflags_args(int fd, int flags)
2183 *
2184 * Change flags of a file given a file descriptor.
2185 */
2186/* ARGSUSED */
2187int
2188sys_fchflags(struct fchflags_args *uap)
2189{
2190 struct thread *td = curthread;
2191 struct proc *p = td->td_proc;
2192 struct file *fp;
2193 int error;
2194
2195 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
2196 return (error);
2197 error = setfflags((struct vnode *) fp->f_data, uap->flags);
2198 fdrop(fp);
2199 return (error);
2200}
2201
2202static int
2203setfmode(struct vnode *vp, int mode)
2204{
2205 struct thread *td = curthread;
2206 struct proc *p = td->td_proc;
2207 int error;
2208 struct vattr vattr;
2209
2210 /*
2211 * note: vget is required for any operation that might mod the vnode
2212 * so VINACTIVE is properly cleared.
2213 */
2214 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
2215 VATTR_NULL(&vattr);
2216 vattr.va_mode = mode & ALLPERMS;
2217 error = VOP_SETATTR(vp, &vattr, p->p_ucred);
2218 vput(vp);
2219 }
2220 return error;
2221}
2222
2223int
2224kern_chmod(struct nlookupdata *nd, int mode)
2225{
2226 struct vnode *vp;
2227 int error;
2228
2229 /* XXX Add NLC flag indicating modifying operation? */
2230 if ((error = nlookup(nd)) != 0)
2231 return (error);
2232 if ((error = cache_vref(nd->nl_ncp, nd->nl_cred, &vp)) != 0)
2233 return (error);
2234 error = setfmode(vp, mode);
2235 vrele(vp);
2236 return (error);
2237}
2238
2239/*
2240 * chmod_args(char *path, int mode)
2241 *
2242 * Change mode of a file given path name.
2243 */
2244/* ARGSUSED */
2245int
2246sys_chmod(struct chmod_args *uap)
2247{
2248 struct nlookupdata nd;
2249 int error;
2250
2251 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2252 if (error == 0)
2253 error = kern_chmod(&nd, uap->mode);
2254 nlookup_done(&nd);
2255 return (error);
2256}
2257
2258/*
2259 * lchmod_args(char *path, int mode)
2260 *
2261 * Change mode of a file given path name (don't follow links.)
2262 */
2263/* ARGSUSED */
2264int
2265sys_lchmod(struct lchmod_args *uap)
2266{
2267 struct nlookupdata nd;
2268 int error;
2269
2270 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2271 if (error == 0)
2272 error = kern_chmod(&nd, uap->mode);
2273 nlookup_done(&nd);
2274 return (error);
2275}
2276
2277/*
2278 * fchmod_args(int fd, int mode)
2279 *
2280 * Change mode of a file given a file descriptor.
2281 */
2282/* ARGSUSED */
2283int
2284sys_fchmod(struct fchmod_args *uap)
2285{
2286 struct thread *td = curthread;
2287 struct proc *p = td->td_proc;
2288 struct file *fp;
2289 int error;
2290
2291 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
2292 return (error);
2293 error = setfmode((struct vnode *)fp->f_data, uap->mode);
2294 fdrop(fp);
2295 return (error);
2296}
2297
2298static int
2299setfown(struct vnode *vp, uid_t uid, gid_t gid)
2300{
2301 struct thread *td = curthread;
2302 struct proc *p = td->td_proc;
2303 int error;
2304 struct vattr vattr;
2305
2306 /*
2307 * note: vget is required for any operation that might mod the vnode
2308 * so VINACTIVE is properly cleared.
2309 */
2310 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
2311 VATTR_NULL(&vattr);
2312 vattr.va_uid = uid;
2313 vattr.va_gid = gid;
2314 error = VOP_SETATTR(vp, &vattr, p->p_ucred);
2315 vput(vp);
2316 }
2317 return error;
2318}
2319
2320int
2321kern_chown(struct nlookupdata *nd, int uid, int gid)
2322{
2323 struct vnode *vp;
2324 int error;
2325
2326 /* XXX Add NLC flag indicating modifying operation? */
2327 if ((error = nlookup(nd)) != 0)
2328 return (error);
2329 if ((error = cache_vref(nd->nl_ncp, nd->nl_cred, &vp)) != 0)
2330 return (error);
2331 error = setfown(vp, uid, gid);
2332 vrele(vp);
2333 return (error);
2334}
2335
2336/*
2337 * chown(char *path, int uid, int gid)
2338 *
2339 * Set ownership given a path name.
2340 */
2341int
2342sys_chown(struct chown_args *uap)
2343{
2344 struct nlookupdata nd;
2345 int error;
2346
2347 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2348 if (error == 0)
2349 error = kern_chown(&nd, uap->uid, uap->gid);
2350 nlookup_done(&nd);
2351 return (error);
2352}
2353
2354/*
2355 * lchown_args(char *path, int uid, int gid)
2356 *
2357 * Set ownership given a path name, do not cross symlinks.
2358 */
2359int
2360sys_lchown(struct lchown_args *uap)
2361{
2362 struct nlookupdata nd;
2363 int error;
2364
2365 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2366 if (error == 0)
2367 error = kern_chown(&nd, uap->uid, uap->gid);
2368 nlookup_done(&nd);
2369 return (error);
2370}
2371
2372/*
2373 * fchown_args(int fd, int uid, int gid)
2374 *
2375 * Set ownership given a file descriptor.
2376 */
2377/* ARGSUSED */
2378int
2379sys_fchown(struct fchown_args *uap)
2380{
2381 struct thread *td = curthread;
2382 struct proc *p = td->td_proc;
2383 struct file *fp;
2384 int error;
2385
2386 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
2387 return (error);
2388 error = setfown((struct vnode *)fp->f_data, uap->uid, uap->gid);
2389 fdrop(fp);
2390 return (error);
2391}
2392
2393static int
2394getutimes(const struct timeval *tvp, struct timespec *tsp)
2395{
2396 struct timeval tv[2];
2397
2398 if (tvp == NULL) {
2399 microtime(&tv[0]);
2400 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2401 tsp[1] = tsp[0];
2402 } else {
2403 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2404 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2405 }
2406 return 0;
2407}
2408
2409static int
2410setutimes(struct vnode *vp, const struct timespec *ts, int nullflag)
2411{
2412 struct thread *td = curthread;
2413 struct proc *p = td->td_proc;
2414 int error;
2415 struct vattr vattr;
2416
2417 /*
2418 * note: vget is required for any operation that might mod the vnode
2419 * so VINACTIVE is properly cleared.
2420 */
2421 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
2422 VATTR_NULL(&vattr);
2423 vattr.va_atime = ts[0];
2424 vattr.va_mtime = ts[1];
2425 if (nullflag)
2426 vattr.va_vaflags |= VA_UTIMES_NULL;
2427 error = VOP_SETATTR(vp, &vattr, p->p_ucred);
2428 vput(vp);
2429 }
2430 return error;
2431}
2432
2433int
2434kern_utimes(struct nlookupdata *nd, struct timeval *tptr)
2435{
2436 struct timespec ts[2];
2437 struct vnode *vp;
2438 int error;
2439
2440 if ((error = getutimes(tptr, ts)) != 0)
2441 return (error);
2442 /* XXX Add NLC flag indicating modifying operation? */
2443 if ((error = nlookup(nd)) != 0)
2444 return (error);
2445 if ((error = cache_vref(nd->nl_ncp, nd->nl_cred, &vp)) != 0)
2446 return (error);
2447 error = setutimes(vp, ts, tptr == NULL);
2448 vrele(vp);
2449 return (error);
2450}
2451
2452/*
2453 * utimes_args(char *path, struct timeval *tptr)
2454 *
2455 * Set the access and modification times of a file.
2456 */
2457int
2458sys_utimes(struct utimes_args *uap)
2459{
2460 struct timeval tv[2];
2461 struct nlookupdata nd;
2462 int error;
2463
2464 if (uap->tptr) {
2465 error = copyin(uap->tptr, tv, sizeof(tv));
2466 if (error)
2467 return (error);
2468 }
2469 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2470 if (error == 0)
2471 error = kern_utimes(&nd, uap->tptr ? tv : NULL);
2472 nlookup_done(&nd);
2473 return (error);
2474}
2475
2476/*
2477 * lutimes_args(char *path, struct timeval *tptr)
2478 *
2479 * Set the access and modification times of a file.
2480 */
2481int
2482sys_lutimes(struct lutimes_args *uap)
2483{
2484 struct timeval tv[2];
2485 struct nlookupdata nd;
2486 int error;
2487
2488 if (uap->tptr) {
2489 error = copyin(uap->tptr, tv, sizeof(tv));
2490 if (error)
2491 return (error);
2492 }
2493 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2494 if (error == 0)
2495 error = kern_utimes(&nd, uap->tptr ? tv : NULL);
2496 nlookup_done(&nd);
2497 return (error);
2498}
2499
2500int
2501kern_futimes(int fd, struct timeval *tptr)
2502{
2503 struct thread *td = curthread;
2504 struct proc *p = td->td_proc;
2505 struct timespec ts[2];
2506 struct file *fp;
2507 int error;
2508
2509 error = getutimes(tptr, ts);
2510 if (error)
2511 return (error);
2512 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
2513 return (error);
2514 error = setutimes((struct vnode *)fp->f_data, ts, tptr == NULL);
2515 fdrop(fp);
2516 return (error);
2517}
2518
2519/*
2520 * futimes_args(int fd, struct timeval *tptr)
2521 *
2522 * Set the access and modification times of a file.
2523 */
2524int
2525sys_futimes(struct futimes_args *uap)
2526{
2527 struct timeval tv[2];
2528 int error;
2529
2530 if (uap->tptr) {
2531 error = copyin(uap->tptr, tv, sizeof(tv));
2532 if (error)
2533 return (error);
2534 }
2535
2536 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL);
2537
2538 return (error);
2539}
2540
2541int
2542kern_truncate(struct nlookupdata *nd, off_t length)
2543{
2544 struct vnode *vp;
2545 struct vattr vattr;
2546 int error;
2547
2548 if (length < 0)
2549 return(EINVAL);
2550 /* XXX Add NLC flag indicating modifying operation? */
2551 if ((error = nlookup(nd)) != 0)
2552 return (error);
2553 if ((error = cache_vref(nd->nl_ncp, nd->nl_cred, &vp)) != 0)
2554 return (error);
2555 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) {
2556 vrele(vp);
2557 return (error);
2558 }
2559 if (vp->v_type == VDIR) {
2560 error = EISDIR;
2561 } else if ((error = vn_writechk(vp)) == 0 &&
2562 (error = VOP_ACCESS(vp, VWRITE, nd->nl_cred)) == 0) {
2563 VATTR_NULL(&vattr);
2564 vattr.va_size = length;
2565 error = VOP_SETATTR(vp, &vattr, nd->nl_cred);
2566 }
2567 vput(vp);
2568 return (error);
2569}
2570
2571/*
2572 * truncate(char *path, int pad, off_t length)
2573 *
2574 * Truncate a file given its path name.
2575 */
2576int
2577sys_truncate(struct truncate_args *uap)
2578{
2579 struct nlookupdata nd;
2580 int error;
2581
2582 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2583 if (error == 0)
2584 error = kern_truncate(&nd, uap->length);
2585 nlookup_done(&nd);
2586 return error;
2587}
2588
2589int
2590kern_ftruncate(int fd, off_t length)
2591{
2592 struct thread *td = curthread;
2593 struct proc *p = td->td_proc;
2594 struct vattr vattr;
2595 struct vnode *vp;
2596 struct file *fp;
2597 int error;
2598
2599 if (length < 0)
2600 return(EINVAL);
2601 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
2602 return (error);
2603 if ((fp->f_flag & FWRITE) == 0) {
2604 error = EINVAL;
2605 goto done;
2606 }
2607 vp = (struct vnode *)fp->f_data;
2608 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2609 if (vp->v_type == VDIR) {
2610 error = EISDIR;
2611 } else if ((error = vn_writechk(vp)) == 0) {
2612 VATTR_NULL(&vattr);
2613 vattr.va_size = length;
2614 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
2615 }
2616 VOP_UNLOCK(vp, 0);
2617done:
2618 fdrop(fp);
2619 return (error);
2620}
2621
2622/*
2623 * ftruncate_args(int fd, int pad, off_t length)
2624 *
2625 * Truncate a file given a file descriptor.
2626 */
2627int
2628sys_ftruncate(struct ftruncate_args *uap)
2629{
2630 int error;
2631
2632 error = kern_ftruncate(uap->fd, uap->length);
2633
2634 return (error);
2635}
2636
2637/*
2638 * fsync(int fd)
2639 *
2640 * Sync an open file.
2641 */
2642/* ARGSUSED */
2643int
2644sys_fsync(struct fsync_args *uap)
2645{
2646 struct thread *td = curthread;
2647 struct proc *p = td->td_proc;
2648 struct vnode *vp;
2649 struct file *fp;
2650 vm_object_t obj;
2651 int error;
2652
2653 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
2654 return (error);
2655 vp = (struct vnode *)fp->f_data;
2656 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2657 if ((obj = vp->v_object) != NULL)
2658 vm_object_page_clean(obj, 0, 0, 0);
2659 if ((error = VOP_FSYNC(vp, MNT_WAIT)) == 0 &&
2660 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP) &&
2661 bioops.io_fsync) {
2662 error = (*bioops.io_fsync)(vp);
2663 }
2664 VOP_UNLOCK(vp, 0);
2665 fdrop(fp);
2666 return (error);
2667}
2668
2669int
2670kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond)
2671{
2672 struct namecache *fncpd;
2673 struct namecache *tncpd;
2674 struct namecache *ncp;
2675 struct mount *mp;
2676 int error;
2677
2678 bwillwrite();
2679 if ((error = nlookup(fromnd)) != 0)
2680 return (error);
2681 if ((fncpd = fromnd->nl_ncp->nc_parent) == NULL)
2682 return (ENOENT);
2683 cache_hold(fncpd);
2684
2685 /*
2686 * unlock the source ncp so we can lookup the target ncp without
2687 * deadlocking. The target may or may not exist so we do not check
2688 * for a target vp like kern_mkdir() and other creation functions do.
2689 *
2690 * The source and target directories are ref'd and rechecked after
2691 * everything is relocked to determine if the source or target file
2692 * has been renamed.
2693 */
2694 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED);
2695 fromnd->nl_flags &= ~NLC_NCPISLOCKED;
2696 cache_unlock(fromnd->nl_ncp);
2697
2698 tond->nl_flags |= NLC_CREATE;
2699 if ((error = nlookup(tond)) != 0) {
2700 cache_drop(fncpd);
2701 return (error);
2702 }
2703 if ((tncpd = tond->nl_ncp->nc_parent) == NULL) {
2704 cache_drop(fncpd);
2705 return (ENOENT);
2706 }
2707 cache_hold(tncpd);
2708
2709 /*
2710 * If the source and target are the same there is nothing to do
2711 */
2712 if (fromnd->nl_ncp == tond->nl_ncp) {
2713 cache_drop(fncpd);
2714 cache_drop(tncpd);
2715 return (0);
2716 }
2717
2718 /*
2719 * relock the source ncp. NOTE AFTER RELOCKING: the source ncp
2720 * may have become invalid while it was unlocked, nc_vp and nc_mount
2721 * could be NULL.
2722 */
2723 if (cache_lock_nonblock(fromnd->nl_ncp) == 0) {
2724 cache_resolve(fromnd->nl_ncp, fromnd->nl_cred);
2725 } else if (fromnd->nl_ncp > tond->nl_ncp) {
2726 cache_lock(fromnd->nl_ncp);
2727 cache_resolve(fromnd->nl_ncp, fromnd->nl_cred);
2728 } else {
2729 cache_unlock(tond->nl_ncp);
2730 cache_lock(fromnd->nl_ncp);
2731 cache_resolve(fromnd->nl_ncp, fromnd->nl_cred);
2732 cache_lock(tond->nl_ncp);
2733 cache_resolve(tond->nl_ncp, tond->nl_cred);
2734 }
2735 fromnd->nl_flags |= NLC_NCPISLOCKED;
2736
2737 /*
2738 * make sure the parent directories linkages are the same
2739 */
2740 if (fncpd != fromnd->nl_ncp->nc_parent ||
2741 tncpd != tond->nl_ncp->nc_parent) {
2742 cache_drop(fncpd);
2743 cache_drop(tncpd);
2744 return (ENOENT);
2745 }
2746
2747 /*
2748 * Both the source and target must be within the same filesystem and
2749 * in the same filesystem as their parent directories within the
2750 * namecache topology.
2751 *
2752 * NOTE: fromnd's nc_mount or nc_vp could be NULL.
2753 */
2754 mp = fncpd->nc_mount;
2755 if (mp != tncpd->nc_mount || mp != fromnd->nl_ncp->nc_mount ||
2756 mp != tond->nl_ncp->nc_mount) {
2757 cache_drop(fncpd);
2758 cache_drop(tncpd);
2759 return (EXDEV);
2760 }
2761
2762 /*
2763 * If the target exists and either the source or target is a directory,
2764 * then both must be directories.
2765 *
2766 * Due to relocking of the source, fromnd->nl_ncp->nc_vp might have
2767 * become NULL.
2768 */
2769 if (tond->nl_ncp->nc_vp) {
2770 if (fromnd->nl_ncp->nc_vp == NULL) {
2771 error = ENOENT;
2772 } else if (fromnd->nl_ncp->nc_vp->v_type == VDIR) {
2773 if (tond->nl_ncp->nc_vp->v_type != VDIR)
2774 error = ENOTDIR;
2775 } else if (tond->nl_ncp->nc_vp->v_type == VDIR) {
2776 error = EISDIR;
2777 }
2778 }
2779
2780 /*
2781 * You cannot rename a source into itself or a subdirectory of itself.
2782 * We check this by travsersing the target directory upwards looking
2783 * for a match against the source.
2784 */
2785 if (error == 0) {
2786 for (ncp = tncpd; ncp; ncp = ncp->nc_parent) {
2787 if (fromnd->nl_ncp == ncp) {
2788 error = EINVAL;
2789 break;
2790 }
2791 }
2792 }
2793
2794 cache_drop(fncpd);
2795 cache_drop(tncpd);
2796
2797 /*
2798 * Even though the namespaces are different, they may still represent
2799 * hardlinks to the same file. The filesystem might have a hard time
2800 * with this so we issue a NREMOVE of the source instead of a NRENAME
2801 * when we detect the situation.
2802 */
2803 if (error == 0) {
2804 if (fromnd->nl_ncp->nc_vp == tond->nl_ncp->nc_vp) {
2805 error = VOP_NREMOVE(fromnd->nl_ncp, fromnd->nl_cred);
2806 } else {
2807 error = VOP_NRENAME(fromnd->nl_ncp, tond->nl_ncp,
2808 tond->nl_cred);
2809 }
2810 }
2811 return (error);
2812}
2813
2814/*
2815 * rename_args(char *from, char *to)
2816 *
2817 * Rename files. Source and destination must either both be directories,
2818 * or both not be directories. If target is a directory, it must be empty.
2819 */
2820int
2821sys_rename(struct rename_args *uap)
2822{
2823 struct nlookupdata fromnd, tond;
2824 int error;
2825
2826 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0);
2827 if (error == 0) {
2828 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0);
2829 if (error == 0)
2830 error = kern_rename(&fromnd, &tond);
2831 nlookup_done(&tond);
2832 }
2833 nlookup_done(&fromnd);
2834 return (error);
2835}
2836
2837int
2838kern_mkdir(struct nlookupdata *nd, int mode)
2839{
2840 struct thread *td = curthread;
2841 struct proc *p = td->td_proc;
2842 struct namecache *ncp;
2843 struct vnode *vp;
2844 struct vattr vattr;
2845 int error;
2846
2847 bwillwrite();
2848 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE;
2849 if ((error = nlookup(nd)) != 0)
2850 return (error);
2851
2852 ncp = nd->nl_ncp;
2853 if (ncp->nc_vp)
2854 return (EEXIST);
2855
2856 VATTR_NULL(&vattr);
2857 vattr.va_type = VDIR;
2858 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
2859
2860 vp = NULL;
2861 error = VOP_NMKDIR(ncp, &vp, p->p_ucred, &vattr);
2862 if (error == 0)
2863 vput(vp);
2864 return (error);
2865}
2866
2867/*
2868 * mkdir_args(char *path, int mode)
2869 *
2870 * Make a directory file.
2871 */
2872/* ARGSUSED */
2873int
2874sys_mkdir(struct mkdir_args *uap)
2875{
2876 struct nlookupdata nd;
2877 int error;
2878
2879 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2880 if (error == 0)
2881 error = kern_mkdir(&nd, uap->mode);
2882 nlookup_done(&nd);
2883 return (error);
2884}
2885
2886int
2887kern_rmdir(struct nlookupdata *nd)
2888{
2889 struct namecache *ncp;
2890 int error;
2891
2892 bwillwrite();
2893 nd->nl_flags |= NLC_DELETE;
2894 if ((error = nlookup(nd)) != 0)
2895 return (error);
2896
2897 ncp = nd->nl_ncp;
2898 error = VOP_NRMDIR(ncp, nd->nl_cred);
2899 return (error);
2900}
2901
2902/*
2903 * rmdir_args(char *path)
2904 *
2905 * Remove a directory file.
2906 */
2907/* ARGSUSED */
2908int
2909sys_rmdir(struct rmdir_args *uap)
2910{
2911 struct nlookupdata nd;
2912 int error;
2913
2914 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2915 if (error == 0)
2916 error = kern_rmdir(&nd);
2917 nlookup_done(&nd);
2918 return (error);
2919}
2920
2921int
2922kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res,
2923 enum uio_seg direction)
2924{
2925 struct thread *td = curthread;
2926 struct proc *p = td->td_proc;
2927 struct vnode *vp;
2928 struct file *fp;
2929 struct uio auio;
2930 struct iovec aiov;
2931 long loff;
2932 int error, eofflag;
2933
2934 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
2935 return (error);
2936 if ((fp->f_flag & FREAD) == 0) {
2937 error = EBADF;
2938 goto done;
2939 }
2940 vp = (struct vnode *)fp->f_data;
2941unionread:
2942 if (vp->v_type != VDIR) {
2943 error = EINVAL;
2944 goto done;
2945 }
2946 aiov.iov_base = buf;
2947 aiov.iov_len = count;
2948 auio.uio_iov = &aiov;
2949 auio.uio_iovcnt = 1;
2950 auio.uio_rw = UIO_READ;
2951 auio.uio_segflg = direction;
2952 auio.uio_td = td;
2953 auio.uio_resid = count;
2954 /* vn_lock(vp, LK_SHARED | LK_RETRY); */
2955 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2956 loff = auio.uio_offset = fp->f_offset;
2957 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
2958 fp->f_offset = auio.uio_offset;
2959 VOP_UNLOCK(vp, 0);
2960 if (error)
2961 goto done;
2962 if (count == auio.uio_resid) {
2963 if (union_dircheckp) {
2964 error = union_dircheckp(td, &vp, fp);
2965 if (error == -1)
2966 goto unionread;
2967 if (error)
2968 goto done;
2969 }
2970 if ((vp->v_flag & VROOT) &&
2971 (vp->v_mount->mnt_flag & MNT_UNION)) {
2972 struct vnode *tvp = vp;
2973 vp = vp->v_mount->mnt_vnodecovered;
2974 vref(vp);
2975 fp->f_data = vp;
2976 fp->f_offset = 0;
2977 vrele(tvp);
2978 goto unionread;
2979 }
2980 }
2981 if (basep) {
2982 *basep = loff;
2983 }
2984 *res = count - auio.uio_resid;
2985done:
2986 fdrop(fp);
2987 return (error);
2988}
2989
2990/*
2991 * getdirentries_args(int fd, char *buf, u_int conut, long *basep)
2992 *
2993 * Read a block of directory entries in a file system independent format.
2994 */
2995int
2996sys_getdirentries(struct getdirentries_args *uap)
2997{
2998 long base;
2999 int error;
3000
3001 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base,
3002 &uap->sysmsg_result, UIO_USERSPACE);
3003
3004 if (error == 0)
3005 error = copyout(&base, uap->basep, sizeof(*uap->basep));
3006 return (error);
3007}
3008
3009/*
3010 * getdents_args(int fd, char *buf, size_t count)
3011 */
3012int
3013sys_getdents(struct getdents_args *uap)
3014{
3015 int error;
3016
3017 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL,
3018 &uap->sysmsg_result, UIO_USERSPACE);
3019
3020 return (error);
3021}
3022
3023/*
3024 * umask(int newmask)
3025 *
3026 * Set the mode mask for creation of filesystem nodes.
3027 *
3028 * MP SAFE
3029 */
3030int
3031sys_umask(struct umask_args *uap)
3032{
3033 struct thread *td = curthread;
3034 struct proc *p = td->td_proc;
3035 struct filedesc *fdp;
3036
3037 fdp = p->p_fd;
3038 uap->sysmsg_result = fdp->fd_cmask;
3039 fdp->fd_cmask = uap->newmask & ALLPERMS;
3040 return (0);
3041}
3042
3043/*
3044 * revoke(char *path)
3045 *
3046 * Void all references to file by ripping underlying filesystem
3047 * away from vnode.
3048 */
3049/* ARGSUSED */
3050int
3051sys_revoke(struct revoke_args *uap)
3052{
3053 struct nlookupdata nd;
3054 struct vattr vattr;
3055 struct vnode *vp;
3056 struct ucred *cred;
3057 int error;
3058
3059 vp = NULL;
3060 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
3061 if (error == 0)
3062 error = nlookup(&nd);
3063 if (error == 0)
3064 error = cache_vref(nd.nl_ncp, nd.nl_cred, &vp);
3065 cred = crhold(nd.nl_cred);
3066 nlookup_done(&nd);
3067 if (error == 0) {
3068 if (vp->v_type != VCHR && vp->v_type != VBLK)
3069 error = EINVAL;
3070 if (error == 0)
3071 error = VOP_GETATTR(vp, &vattr);
3072 if (error == 0 && cred->cr_uid != vattr.va_uid)
3073 error = suser_cred(cred, PRISON_ROOT);
3074 if (error == 0 && count_udev(vp->v_udev) > 0) {
3075 error = 0;
3076 vx_lock(vp);
3077 VOP_REVOKE(vp, REVOKEALL);
3078 vx_unlock(vp);
3079 }
3080 vrele(vp);
3081 }
3082 if (cred)
3083 crfree(cred);
3084 return (error);
3085}
3086
3087/*
3088 * getfh_args(char *fname, fhandle_t *fhp)
3089 *
3090 * Get (NFS) file handle
3091 */
3092int
3093sys_getfh(struct getfh_args *uap)
3094{
3095 struct thread *td = curthread;
3096 struct nlookupdata nd;
3097 fhandle_t fh;
3098 struct vnode *vp;
3099 int error;
3100
3101 /*
3102 * Must be super user
3103 */
3104 if ((error = suser(td)) != 0)
3105 return (error);
3106
3107 vp = NULL;
3108 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW);
3109 if (error == 0)
3110 error = nlookup(&nd);
3111 if (error == 0)
3112 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp);
3113 nlookup_done(&nd);
3114 if (error == 0) {
3115 bzero(&fh, sizeof(fh));
3116 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3117 error = VFS_VPTOFH(vp, &fh.fh_fid);
3118 vput(vp);
3119 if (error == 0)
3120 error = copyout(&fh, uap->fhp, sizeof(fh));
3121 }
3122 return (error);
3123}
3124
3125/*
3126 * fhopen_args(const struct fhandle *u_fhp, int flags)
3127 *
3128 * syscall for the rpc.lockd to use to translate a NFS file handle into
3129 * an open descriptor.
3130 *
3131 * warning: do not remove the suser() call or this becomes one giant
3132 * security hole.
3133 */
3134int
3135sys_fhopen(struct fhopen_args *uap)
3136{
3137 struct thread *td = curthread;
3138 struct proc *p = td->td_proc;
3139 struct mount *mp;
3140 struct vnode *vp;
3141 struct fhandle fhp;
3142 struct vattr vat;
3143 struct vattr *vap = &vat;
3144 struct flock lf;
3145 int fmode, mode, error, type;
3146 struct file *nfp;
3147 struct file *fp;
3148 int indx;
3149
3150 /*
3151 * Must be super user
3152 */
3153 error = suser(td);
3154 if (error)
3155 return (error);
3156
3157 fmode = FFLAGS(uap->flags);
3158 /* why not allow a non-read/write open for our lockd? */
3159 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3160 return (EINVAL);
3161 error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
3162 if (error)
3163 return(error);
3164 /* find the mount point */
3165 mp = vfs_getvfs(&fhp.fh_fsid);
3166 if (mp == NULL)
3167 return (ESTALE);
3168 /* now give me my vnode, it gets returned to me locked */
3169 error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3170 if (error)
3171 return (error);
3172 /*
3173 * from now on we have to make sure not
3174 * to forget about the vnode
3175 * any error that causes an abort must vput(vp)
3176 * just set error = err and 'goto bad;'.
3177 */
3178
3179 /*
3180 * from vn_open
3181 */
3182 if (vp->v_type == VLNK) {
3183 error = EMLINK;
3184 goto bad;
3185 }
3186 if (vp->v_type == VSOCK) {
3187 error = EOPNOTSUPP;
3188 goto bad;
3189 }
3190 mode = 0;
3191 if (fmode & (FWRITE | O_TRUNC)) {
3192 if (vp->v_type == VDIR) {
3193 error = EISDIR;
3194 goto bad;
3195 }
3196 error = vn_writechk(vp);
3197 if (error)
3198 goto bad;
3199 mode |= VWRITE;
3200 }
3201 if (fmode & FREAD)
3202 mode |= VREAD;
3203 if (mode) {
3204 error = VOP_ACCESS(vp, mode, p->p_ucred);
3205 if (error)
3206 goto bad;
3207 }
3208 if (fmode & O_TRUNC) {
3209 VOP_UNLOCK(vp, 0); /* XXX */
3210 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
3211 VATTR_NULL(vap);
3212 vap->va_size = 0;
3213 error = VOP_SETATTR(vp, vap, p->p_ucred);
3214 if (error)
3215 goto bad;
3216 }
3217
3218 /*
3219 * VOP_OPEN needs the file pointer so it can potentially override
3220 * it.
3221 *
3222 * WARNING! no f_ncp will be associated when fhopen()ing a directory.
3223 * XXX
3224 */
3225 if ((error = falloc(p, &nfp, &indx)) != 0)
3226 goto bad;
3227 fp = nfp;
3228
3229 error = VOP_OPEN(vp, fmode, p->p_ucred, fp);
3230 if (error) {
3231 /*
3232 * setting f_ops this way prevents VOP_CLOSE from being
3233 * called or fdrop() releasing the vp from v_data. Since
3234 * the VOP_OPEN failed we don't want to VOP_CLOSE.
3235 */
3236 fp->f_ops = &badfileops;
3237 fp->f_data = NULL;
3238 goto bad_drop;
3239 }
3240
3241 /*
3242 * The fp is given its own reference, we still have our ref and lock.
3243 *
3244 * Assert that all regular files must be created with a VM object.
3245 */
3246 if (vp->v_type == VREG && vp->v_object == NULL) {
3247 printf("fhopen: regular file did not have VM object: %p\n", vp);
3248 goto bad_drop;
3249 }
3250
3251 /*
3252 * The open was successful. Handle any locking requirements.
3253 */
3254 if (fmode & (O_EXLOCK | O_SHLOCK)) {
3255 lf.l_whence = SEEK_SET;
3256 lf.l_start = 0;
3257 lf.l_len = 0;
3258 if (fmode & O_EXLOCK)
3259 lf.l_type = F_WRLCK;
3260 else
3261 lf.l_type = F_RDLCK;
3262 if (fmode & FNONBLOCK)
3263 type = 0;
3264 else
3265 type = F_WAIT;
3266 VOP_UNLOCK(vp, 0);
3267 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
3268 /*
3269 * release our private reference.
3270 */
3271 fsetfd(p, NULL, indx);
3272 fdrop(fp);
3273 vrele(vp);
3274 return (error);
3275 }
3276 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3277 fp->f_flag |= FHASLOCK;
3278 }
3279
3280 /*
3281 * Clean up. Associate the file pointer with the previously
3282 * reserved descriptor and return it.
3283 */
3284 vput(vp);
3285 fsetfd(p, fp, indx);
3286 fdrop(fp);
3287 uap->sysmsg_result = indx;
3288 return (0);
3289
3290bad_drop:
3291 fsetfd(p, NULL, indx);
3292 fdrop(fp);
3293bad:
3294 vput(vp);
3295 return (error);
3296}
3297
3298/*
3299 * fhstat_args(struct fhandle *u_fhp, struct stat *sb)
3300 */
3301int
3302sys_fhstat(struct fhstat_args *uap)
3303{
3304 struct thread *td = curthread;
3305 struct stat sb;
3306 fhandle_t fh;
3307 struct mount *mp;
3308 struct vnode *vp;
3309 int error;
3310
3311 /*
3312 * Must be super user
3313 */
3314 error = suser(td);
3315 if (error)
3316 return (error);
3317
3318 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
3319 if (error)
3320 return (error);
3321
3322 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3323 return (ESTALE);
3324 if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3325 return (error);
3326 error = vn_stat(vp, &sb, td->td_proc->p_ucred);
3327 vput(vp);
3328 if (error)
3329 return (error);
3330 error = copyout(&sb, uap->sb, sizeof(sb));
3331 return (error);
3332}
3333
3334/*
3335 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf)
3336 */
3337int
3338sys_fhstatfs(struct fhstatfs_args *uap)
3339{
3340 struct thread *td = curthread;
3341 struct proc *p = td->td_proc;
3342 struct statfs *sp;
3343 struct mount *mp;
3344 struct vnode *vp;
3345 struct statfs sb;
3346 char *fullpath, *freepath;
3347 fhandle_t fh;
3348 int error;
3349
3350 /*
3351 * Must be super user
3352 */
3353 if ((error = suser(td)))
3354 return (error);
3355
3356 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
3357 return (error);
3358
3359 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3360 return (ESTALE);
3361
3362 if (p != NULL && (p->p_fd->fd_nrdir->nc_flag & NCF_ROOT) == 0 &&
3363 !chroot_visible_mnt(mp, p))
3364 return (ESTALE);
3365
3366 if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3367 return (error);
3368 mp = vp->v_mount;
3369 sp = &mp->mnt_stat;
3370 vput(vp);
3371 if ((error = VFS_STATFS(mp, sp, p->p_ucred)) != 0)
3372 return (error);
3373
3374 error = cache_fullpath(p, mp->mnt_ncp, &fullpath, &freepath);
3375 if (error)
3376 return(error);
3377 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
3378 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
3379 free(freepath, M_TEMP);
3380
3381 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3382 if (suser(td)) {
3383 bcopy(sp, &sb, sizeof(sb));
3384 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3385 sp = &sb;
3386 }
3387 return (copyout(sp, uap->buf, sizeof(*sp)));
3388}
3389
3390/*
3391 * Syscall to push extended attribute configuration information into the
3392 * VFS. Accepts a path, which it converts to a mountpoint, as well as
3393 * a command (int cmd), and attribute name and misc data. For now, the
3394 * attribute name is left in userspace for consumption by the VFS_op.
3395 * It will probably be changed to be copied into sysspace by the
3396 * syscall in the future, once issues with various consumers of the
3397 * attribute code have raised their hands.
3398 *
3399 * Currently this is used only by UFS Extended Attributes.
3400 */
3401int
3402sys_extattrctl(struct extattrctl_args *uap)
3403{
3404 struct nlookupdata nd;
3405 struct mount *mp;
3406 struct vnode *vp;
3407 int error;
3408
3409 vp = NULL;
3410 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
3411 if (error == 0)
3412 error = nlookup(&nd);
3413 if (error == 0) {
3414 mp = nd.nl_ncp->nc_mount;
3415 error = VFS_EXTATTRCTL(mp, uap->cmd,
3416 uap->attrname, uap->arg,
3417 nd.nl_cred);
3418 }
3419 nlookup_done(&nd);
3420 return (error);
3421}
3422
3423/*
3424 * Syscall to set a named extended attribute on a file or directory.
3425 * Accepts attribute name, and a uio structure pointing to the data to set.
3426 * The uio is consumed in the style of writev(). The real work happens
3427 * in VOP_SETEXTATTR().
3428 */
3429int
3430sys_extattr_set_file(struct extattr_set_file_args *uap)
3431{
3432 char attrname[EXTATTR_MAXNAMELEN];
3433 struct iovec aiov[UIO_SMALLIOV];
3434 struct iovec *needfree;
3435 struct nlookupdata nd;
3436 struct iovec *iov;
3437 struct vnode *vp;
3438 struct uio auio;
3439 u_int iovlen;
3440 u_int cnt;
3441 int error;
3442 int i;
3443
3444 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
3445 if (error)
3446 return (error);
3447
3448 vp = NULL;
3449 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
3450 if (error == 0)
3451 error = nlookup(&nd);
3452 if (error == 0)
3453 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp);
3454 if (error) {
3455 nlookup_done(&nd);
3456 return (error);
3457 }
3458
3459 needfree = NULL;
3460 iovlen = uap->iovcnt * sizeof(struct iovec);
3461 if (uap->iovcnt > UIO_SMALLIOV) {
3462 if (uap->iovcnt > UIO_MAXIOV) {
3463 error = EINVAL;
3464 goto done;
3465 }
3466 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
3467 needfree = iov;
3468 } else {
3469 iov = aiov;
3470 }
3471 auio.uio_iov = iov;
3472 auio.uio_iovcnt = uap->iovcnt;
3473 auio.uio_rw = UIO_WRITE;
3474 auio.uio_segflg = UIO_USERSPACE;
3475 auio.uio_td = nd.nl_td;
3476 auio.uio_offset = 0;
3477 if ((error = copyin(uap->iovp, iov, iovlen)))
3478 goto done;
3479 auio.uio_resid = 0;
3480 for (i = 0; i < uap->iovcnt; i++) {
3481 if (iov->iov_len > INT_MAX - auio.uio_resid) {
3482 error = EINVAL;
3483 goto done;
3484 }
3485 auio.uio_resid += iov->iov_len;
3486 iov++;
3487 }
3488 cnt = auio.uio_resid;
3489 error = VOP_SETEXTATTR(vp, attrname, &auio, nd.nl_cred);
3490 cnt -= auio.uio_resid;
3491 uap->sysmsg_result = cnt;
3492done:
3493 vput(vp);
3494 nlookup_done(&nd);
3495 if (needfree)
3496 FREE(needfree, M_IOV);
3497 return (error);
3498}
3499
3500/*
3501 * Syscall to get a named extended attribute on a file or directory.
3502 * Accepts attribute name, and a uio structure pointing to a buffer for the
3503 * data. The uio is consumed in the style of readv(). The real work
3504 * happens in VOP_GETEXTATTR();
3505 */
3506int
3507sys_extattr_get_file(struct extattr_get_file_args *uap)
3508{
3509 char attrname[EXTATTR_MAXNAMELEN];
3510 struct iovec aiov[UIO_SMALLIOV];
3511 struct iovec *needfree;
3512 struct nlookupdata nd;
3513 struct iovec *iov;
3514 struct vnode *vp;
3515 struct uio auio;
3516 u_int iovlen;
3517 u_int cnt;
3518 int error;
3519 int i;
3520
3521 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
3522 if (error)
3523 return (error);
3524
3525 vp = NULL;
3526 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
3527 if (error == 0)
3528 error = nlookup(&nd);
3529 if (error == 0)
3530 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp);
3531 if (error) {
3532 nlookup_done(&nd);
3533 return (error);
3534 }
3535
3536 iovlen = uap->iovcnt * sizeof (struct iovec);
3537 needfree = NULL;
3538 if (uap->iovcnt > UIO_SMALLIOV) {
3539 if (uap->iovcnt > UIO_MAXIOV) {
3540 error = EINVAL;
3541 goto done;
3542 }
3543 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
3544 needfree = iov;
3545 } else {
3546 iov = aiov;
3547 }
3548 auio.uio_iov = iov;
3549 auio.uio_iovcnt = uap->iovcnt;
3550 auio.uio_rw = UIO_READ;
3551 auio.uio_segflg = UIO_USERSPACE;
3552 auio.uio_td = nd.nl_td;
3553 auio.uio_offset = 0;
3554 if ((error = copyin(uap->iovp, iov, iovlen)))
3555 goto done;
3556 auio.uio_resid = 0;
3557 for (i = 0; i < uap->iovcnt; i++) {
3558 if (iov->iov_len > INT_MAX - auio.uio_resid) {
3559 error = EINVAL;
3560 goto done;
3561 }
3562 auio.uio_resid += iov->iov_len;
3563 iov++;
3564 }
3565 cnt = auio.uio_resid;
3566 error = VOP_GETEXTATTR(vp, attrname, &auio, nd.nl_cred);
3567 cnt -= auio.uio_resid;
3568 uap->sysmsg_result = cnt;
3569done:
3570 vput(vp);
3571 nlookup_done(&nd);
3572 if (needfree)
3573 FREE(needfree, M_IOV);
3574 return(error);
3575}
3576
3577/*
3578 * Syscall to delete a named extended attribute from a file or directory.
3579 * Accepts attribute name. The real work happens in VOP_SETEXTATTR().
3580 */
3581int
3582sys_extattr_delete_file(struct extattr_delete_file_args *uap)
3583{
3584 char attrname[EXTATTR_MAXNAMELEN];
3585 struct nlookupdata nd;
3586 struct vnode *vp;
3587 int error;
3588
3589 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
3590 if (error)
3591 return(error);
3592
3593 vp = NULL;
3594 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
3595 if (error == 0)
3596 error = nlookup(&nd);
3597 if (error == 0)
3598 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp);
3599 if (error) {
3600 nlookup_done(&nd);
3601 return (error);
3602 }
3603
3604 error = VOP_SETEXTATTR(vp, attrname, NULL, nd.nl_cred);
3605 vput(vp);
3606 nlookup_done(&nd);
3607 return(error);
3608}
3609
3610static int
3611chroot_visible_mnt(struct mount *mp, struct proc *p)
3612{
3613 struct namecache *ncp;
3614 /*
3615 * First check if this file system is below
3616 * the chroot path.
3617 */
3618 ncp = mp->mnt_ncp;
3619 while (ncp != NULL && ncp != p->p_fd->fd_nrdir)
3620 ncp = ncp->nc_parent;
3621 if (ncp == NULL) {
3622 /*
3623 * This is not below the chroot path.
3624 *
3625 * Check if the chroot path is on the same filesystem,
3626 * by determing if we have to cross a mount point
3627 * before reaching mp->mnt_ncp.
3628 */
3629 ncp = p->p_fd->fd_nrdir;
3630 while (ncp != NULL && ncp != mp->mnt_ncp) {
3631 if (ncp->nc_flag & NCF_MOUNTPT) {
3632 ncp = NULL;
3633 break;
3634 }
3635 ncp = ncp->nc_parent;
3636 }
3637 }
3638 return(ncp != NULL);
3639}