kernel - Add trigger_syncer(), VFS_MODIFYING()
[dragonfly.git] / sys / kern / vfs_syscalls.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
dc71b7ab 18 * 3. Neither the name of the University nor the names of its contributors
984263bc
MD
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94
35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $
36 */
37
984263bc
MD
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/buf.h>
16df2c74 41#include <sys/conf.h>
984263bc
MD
42#include <sys/sysent.h>
43#include <sys/malloc.h>
44#include <sys/mount.h>
432b8263 45#include <sys/mountctl.h>
984263bc 46#include <sys/sysproto.h>
984263bc
MD
47#include <sys/filedesc.h>
48#include <sys/kernel.h>
49#include <sys/fcntl.h>
50#include <sys/file.h>
51#include <sys/linker.h>
52#include <sys/stat.h>
53#include <sys/unistd.h>
54#include <sys/vnode.h>
55#include <sys/proc.h>
895c1f85 56#include <sys/priv.h>
9658f145 57#include <sys/jail.h>
dadab5e9 58#include <sys/namei.h>
21739618 59#include <sys/nlookup.h>
984263bc
MD
60#include <sys/dirent.h>
61#include <sys/extattr.h>
8fa76237 62#include <sys/spinlock.h>
8f6f8622 63#include <sys/kern_syscall.h>
70aac194 64#include <sys/objcache.h>
984263bc 65#include <sys/sysctl.h>
408357d8
MD
66
67#include <sys/buf2.h>
28623bf9
MD
68#include <sys/file2.h>
69#include <sys/spinlock2.h>
70
984263bc
MD
71#include <vm/vm.h>
72#include <vm/vm_object.h>
984263bc
MD
73#include <vm/vm_page.h>
74
28623bf9
MD
75#include <machine/limits.h>
76#include <machine/stdarg.h>
77
c6b20585
SW
78static void mount_warning(struct mount *mp, const char *ctl, ...)
79 __printflike(2, 3);
d3c546e6 80static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb);
e24b948e 81static int checkvp_chdir (struct vnode *vn, struct thread *td);
77480a97 82static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch);
35949930 83static int chroot_refuse_vdir_fds (thread_t td, struct filedesc *fdp);
75ffff0d 84static int chroot_visible_mnt(struct mount *mp, struct proc *p);
981e3cc8 85static int getutimes (struct timeval *, struct timespec *);
f34e9020 86static int getutimens (const struct timespec *, struct timespec *, int *);
82318d45 87static int setfown (struct mount *, struct vnode *, uid_t, gid_t);
402ed7e1
RG
88static int setfmode (struct vnode *, int);
89static int setfflags (struct vnode *, int);
3a907475
MD
90static int setutimes (struct vnode *, struct vattr *,
91 const struct timespec *, int);
984263bc
MD
92static int usermount = 0; /* if 1, non-root can mount fs. */
93
093e85dc
SG
94SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
95 "Allow non-root users to mount filesystems");
984263bc
MD
96
97/*
98 * Virtual File System System Calls
99 */
100
101/*
102 * Mount a file system.
3919ced0 103 *
41c20dac 104 * mount_args(char *type, char *path, int flags, caddr_t data)
3919ced0
MD
105 *
106 * MPALMOSTSAFE
41c20dac 107 */
984263bc 108int
753fd850 109sys_mount(struct mount_args *uap)
984263bc 110{
dadab5e9 111 struct thread *td = curthread;
984263bc 112 struct vnode *vp;
28623bf9 113 struct nchandle nch;
8b02b69a 114 struct mount *mp, *nullmp;
984263bc
MD
115 struct vfsconf *vfsp;
116 int error, flag = 0, flag2 = 0;
1d505369 117 int hasmount;
984263bc 118 struct vattr va;
21739618 119 struct nlookupdata nd;
984263bc 120 char fstypename[MFSNAMELEN];
3919ced0 121 struct ucred *cred;
984263bc 122
9910d07b 123 cred = td->td_ucred;
3919ced0
MD
124 if (jailed(cred)) {
125 error = EPERM;
126 goto done;
127 }
895c1f85 128 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT)))
3919ced0
MD
129 goto done;
130
984263bc
MD
131 /*
132 * Do not allow NFS export by non-root users.
133 */
ab2eb4eb 134 if (uap->flags & MNT_EXPORTED) {
895c1f85 135 error = priv_check(td, PRIV_ROOT);
984263bc 136 if (error)
3919ced0 137 goto done;
984263bc
MD
138 }
139 /*
140 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
141 */
895c1f85 142 if (priv_check(td, PRIV_ROOT))
ab2eb4eb 143 uap->flags |= MNT_NOSUID | MNT_NODEV;
21739618 144
984263bc 145 /*
28623bf9 146 * Lookup the requested path and extract the nch and vnode.
984263bc 147 */
21739618
MD
148 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
149 if (error == 0) {
150 if ((error = nlookup(&nd)) == 0) {
28623bf9 151 if (nd.nl_nch.ncp->nc_vp == NULL)
21739618
MD
152 error = ENOENT;
153 }
154 }
155 if (error) {
156 nlookup_done(&nd);
3919ced0 157 goto done;
21739618
MD
158 }
159
8b02b69a
SK
160 /*
161 * If the target filesystem is resolved via a nullfs mount, then
162 * nd.nl_nch.mount will be pointing to the nullfs mount structure
163 * instead of the target file system. We need it in case we are
164 * doing an update.
165 */
166 nullmp = nd.nl_nch.mount;
167
21739618
MD
168 /*
169 * Extract the locked+refd ncp and cleanup the nd structure
170 */
28623bf9
MD
171 nch = nd.nl_nch;
172 cache_zero(&nd.nl_nch);
21739618
MD
173 nlookup_done(&nd);
174
6fa06591
MD
175 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) &&
176 (mp = cache_findmount(&nch)) != NULL) {
177 cache_dropmount(mp);
1d505369 178 hasmount = 1;
6fa06591 179 } else {
1d505369 180 hasmount = 0;
6fa06591 181 }
1d505369
MD
182
183
21739618 184 /*
28623bf9 185 * now we have the locked ref'd nch and unreferenced vnode.
21739618 186 */
28623bf9 187 vp = nch.ncp->nc_vp;
87de5057 188 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) {
28623bf9 189 cache_put(&nch);
3919ced0 190 goto done;
21739618 191 }
28623bf9 192 cache_unlock(&nch);
21739618 193
8b02b69a
SK
194 /*
195 * Extract the file system type. We need to know this early, to take
196 * appropriate actions if we are dealing with a nullfs.
197 */
198 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) {
199 cache_drop(&nch);
200 vput(vp);
3919ced0 201 goto done;
8b02b69a
SK
202 }
203
21739618 204 /*
28623bf9 205 * Now we have an unlocked ref'd nch and a locked ref'd vp
21739618 206 */
ab2eb4eb 207 if (uap->flags & MNT_UPDATE) {
67863d04 208 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) {
28623bf9 209 cache_drop(&nch);
984263bc 210 vput(vp);
3919ced0
MD
211 error = EINVAL;
212 goto done;
984263bc 213 }
8b02b69a
SK
214
215 if (strncmp(fstypename, "null", 5) == 0) {
216 KKASSERT(nullmp);
217 mp = nullmp;
218 } else {
219 mp = vp->v_mount;
220 }
221
984263bc
MD
222 flag = mp->mnt_flag;
223 flag2 = mp->mnt_kern_flag;
224 /*
225 * We only allow the filesystem to be reloaded if it
226 * is currently mounted read-only.
227 */
ab2eb4eb 228 if ((uap->flags & MNT_RELOAD) &&
984263bc 229 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
28623bf9 230 cache_drop(&nch);
984263bc 231 vput(vp);
3919ced0
MD
232 error = EOPNOTSUPP; /* Needs translation */
233 goto done;
984263bc
MD
234 }
235 /*
236 * Only root, or the user that did the original mount is
237 * permitted to update it.
238 */
acde96db 239 if (mp->mnt_stat.f_owner != cred->cr_uid &&
895c1f85 240 (error = priv_check(td, PRIV_ROOT))) {
28623bf9 241 cache_drop(&nch);
984263bc 242 vput(vp);
3919ced0 243 goto done;
984263bc 244 }
f9642f56 245 if (vfs_busy(mp, LK_NOWAIT)) {
28623bf9 246 cache_drop(&nch);
984263bc 247 vput(vp);
3919ced0
MD
248 error = EBUSY;
249 goto done;
984263bc 250 }
1c8efd4d 251 if (hasmount) {
28623bf9 252 cache_drop(&nch);
f9642f56 253 vfs_unbusy(mp);
984263bc 254 vput(vp);
3919ced0
MD
255 error = EBUSY;
256 goto done;
984263bc 257 }
984263bc 258 mp->mnt_flag |=
ab2eb4eb 259 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
ace53c28 260 lwkt_gettoken(&mp->mnt_token);
a11aaa81 261 vn_unlock(vp);
87f62b1c 262 vfsp = mp->mnt_vfc;
984263bc
MD
263 goto update;
264 }
8a70992d 265
984263bc
MD
266 /*
267 * If the user is not root, ensure that they own the directory
268 * onto which we are attempting to mount.
269 */
87de5057 270 if ((error = VOP_GETATTR(vp, &va)) ||
8a70992d
MD
271 (va.va_uid != cred->cr_uid &&
272 (error = priv_check(td, PRIV_ROOT)))) {
28623bf9 273 cache_drop(&nch);
984263bc 274 vput(vp);
3919ced0 275 goto done;
984263bc 276 }
87de5057 277 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) {
28623bf9 278 cache_drop(&nch);
984263bc 279 vput(vp);
3919ced0 280 goto done;
984263bc
MD
281 }
282 if (vp->v_type != VDIR) {
28623bf9 283 cache_drop(&nch);
984263bc 284 vput(vp);
3919ced0
MD
285 error = ENOTDIR;
286 goto done;
984263bc 287 }
5dc91765
MD
288 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) {
289 cache_drop(&nch);
290 vput(vp);
3919ced0
MD
291 error = EPERM;
292 goto done;
5dc91765 293 }
2613053d 294 vfsp = vfsconf_find_by_name(fstypename);
984263bc
MD
295 if (vfsp == NULL) {
296 linker_file_t lf;
297
298 /* Only load modules for root (very important!) */
895c1f85 299 if ((error = priv_check(td, PRIV_ROOT)) != 0) {
28623bf9 300 cache_drop(&nch);
984263bc 301 vput(vp);
3919ced0 302 goto done;
984263bc 303 }
1c0e3286 304 error = linker_load_file(fstypename, &lf);
984263bc 305 if (error || lf == NULL) {
28623bf9 306 cache_drop(&nch);
984263bc
MD
307 vput(vp);
308 if (lf == NULL)
309 error = ENODEV;
3919ced0 310 goto done;
984263bc
MD
311 }
312 lf->userrefs++;
313 /* lookup again, see if the VFS was loaded */
2613053d 314 vfsp = vfsconf_find_by_name(fstypename);
984263bc
MD
315 if (vfsp == NULL) {
316 lf->userrefs--;
317 linker_file_unload(lf);
28623bf9 318 cache_drop(&nch);
984263bc 319 vput(vp);
3919ced0
MD
320 error = ENODEV;
321 goto done;
984263bc
MD
322 }
323 }
1c8efd4d 324 if (hasmount) {
28623bf9 325 cache_drop(&nch);
984263bc 326 vput(vp);
3919ced0
MD
327 error = EBUSY;
328 goto done;
984263bc 329 }
984263bc
MD
330
331 /*
332 * Allocate and initialize the filesystem.
333 */
efda3bd0 334 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK);
aac0aabd 335 mount_init(mp);
f9642f56 336 vfs_busy(mp, LK_NOWAIT);
984263bc
MD
337 mp->mnt_op = vfsp->vfc_vfsops;
338 mp->mnt_vfc = vfsp;
d84f6fa1 339 mp->mnt_pbuf_count = nswbuf_kva / NSWBUF_SPLIT;
984263bc
MD
340 vfsp->vfc_refcount++;
341 mp->mnt_stat.f_type = vfsp->vfc_typenum;
342 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
343 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
acde96db 344 mp->mnt_stat.f_owner = cred->cr_uid;
ace53c28 345 lwkt_gettoken(&mp->mnt_token);
a11aaa81 346 vn_unlock(vp);
984263bc
MD
347update:
348 /*
ace53c28
MD
349 * (per-mount token acquired at this point)
350 *
984263bc
MD
351 * Set the mount level flags.
352 */
ab2eb4eb 353 if (uap->flags & MNT_RDONLY)
984263bc
MD
354 mp->mnt_flag |= MNT_RDONLY;
355 else if (mp->mnt_flag & MNT_RDONLY)
356 mp->mnt_kern_flag |= MNTK_WANTRDWR;
357 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
55358b98 358 MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME |
e0fb398b 359 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM |
e2950f41
TK
360 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR |
361 MNT_AUTOMOUNTED);
ab2eb4eb 362 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC |
55358b98 363 MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_FORCE |
e0fb398b 364 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM |
e2950f41
TK
365 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR |
366 MNT_AUTOMOUNTED);
87f62b1c
MD
367
368 /*
369 * Pre-set the mount's ALL_MPSAFE flags if specified in the vfsconf.
370 * This way the initial VFS_MOUNT() call will also be MPSAFE.
371 */
372 if (vfsp->vfc_flags & VFCF_MPSAFE)
373 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE;
374
984263bc
MD
375 /*
376 * Mount the filesystem.
377 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
fad57d0e 378 * get.
984263bc 379 */
acde96db 380 error = VFS_MOUNT(mp, uap->path, uap->data, cred);
984263bc
MD
381 if (mp->mnt_flag & MNT_UPDATE) {
382 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
383 mp->mnt_flag &= ~MNT_RDONLY;
384 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
385 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
386 if (error) {
387 mp->mnt_flag = flag;
388 mp->mnt_kern_flag = flag2;
389 }
ace53c28 390 lwkt_reltoken(&mp->mnt_token);
f9642f56 391 vfs_unbusy(mp);
984263bc 392 vrele(vp);
28623bf9 393 cache_drop(&nch);
3919ced0 394 goto done;
984263bc 395 }
ca466bae 396 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
8a70992d 397
984263bc 398 /*
21739618 399 * Put the new filesystem on the mount list after root. The mount
28623bf9
MD
400 * point gets its own mnt_ncmountpt (unless the VFS already set one
401 * up) which represents the root of the mount. The lookup code
402 * detects the mount point going forward and checks the root of
403 * the mount going backwards.
8e005a45
MD
404 *
405 * It is not necessary to invalidate or purge the vnode underneath
406 * because elements under the mount will be given their own glue
407 * namecache record.
984263bc 408 */
984263bc 409 if (!error) {
28623bf9
MD
410 if (mp->mnt_ncmountpt.ncp == NULL) {
411 /*
a9d06cca
MD
412 * Allocate, then unlock, but leave the ref intact.
413 * This is the mnt_refs (1) that we will retain
414 * through to the unmount.
28623bf9
MD
415 */
416 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL);
417 cache_unlock(&mp->mnt_ncmountpt);
418 }
fda4c5f3 419 vn_unlock(vp);
28623bf9 420 mp->mnt_ncmounton = nch; /* inherits ref */
fda4c5f3 421 cache_lock(&nch);
28623bf9 422 nch.ncp->nc_flag |= NCF_ISMOUNTPT;
fda4c5f3 423 cache_unlock(&nch);
bd7598aa 424 cache_ismounting(mp);
fda4c5f3 425 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
28623bf9 426
861905fb 427 mountlist_insert(mp, MNTINS_LAST);
a11aaa81 428 vn_unlock(vp);
c0721f5f 429 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt);
41a01a4d 430 error = vfs_allocate_syncvnode(mp);
ace53c28 431 lwkt_reltoken(&mp->mnt_token);
f9642f56 432 vfs_unbusy(mp);
1d505369
MD
433 error = VFS_START(mp, 0);
434 vrele(vp);
5e25370d 435 KNOTE(&fs_klist, VQ_MOUNT);
984263bc 436 } else {
92f61d6d 437 vn_syncer_thr_stop(mp);
66a1ddf5
MD
438 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops);
439 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops);
440 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
441 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
442 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
984263bc 443 mp->mnt_vfc->vfc_refcount--;
ace53c28 444 lwkt_reltoken(&mp->mnt_token);
f9642f56 445 vfs_unbusy(mp);
efda3bd0 446 kfree(mp, M_MOUNT);
28623bf9 447 cache_drop(&nch);
984263bc
MD
448 vput(vp);
449 }
3919ced0 450done:
984263bc
MD
451 return (error);
452}
453
454/*
455 * Scan all active processes to see if any of them have a current
456 * or root directory onto which the new filesystem has just been
457 * mounted. If so, replace them with the new mount point.
21739618 458 *
8a70992d
MD
459 * Both old_nch and new_nch are ref'd on call but not locked.
460 * new_nch must be temporarily locked so it can be associated with the
461 * vnode representing the root of the mount point.
984263bc 462 */
8fa76237 463struct checkdirs_info {
77480a97
MD
464 struct nchandle old_nch;
465 struct nchandle new_nch;
466 struct vnode *old_vp;
467 struct vnode *new_vp;
8fa76237
MD
468};
469
470static int checkdirs_callback(struct proc *p, void *data);
471
984263bc 472static void
77480a97 473checkdirs(struct nchandle *old_nch, struct nchandle *new_nch)
984263bc 474{
8fa76237 475 struct checkdirs_info info;
77480a97 476 struct vnode *olddp;
984263bc 477 struct vnode *newdp;
690a3127 478 struct mount *mp;
984263bc 479
77480a97
MD
480 /*
481 * If the old mount point's vnode has a usecount of 1, it is not
482 * being held as a descriptor anywhere.
483 */
484 olddp = old_nch->ncp->nc_vp;
ee173d09 485 if (olddp == NULL || VREFCNT(olddp) == 1)
984263bc 486 return;
77480a97
MD
487
488 /*
489 * Force the root vnode of the new mount point to be resolved
490 * so we can update any matching processes.
491 */
492 mp = new_nch->mount;
690a3127 493 if (VFS_ROOT(mp, &newdp))
984263bc 494 panic("mount: lost mount");
e15f6c4d 495 vn_unlock(newdp);
8a70992d 496 cache_lock(new_nch);
e15f6c4d 497 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY);
77480a97
MD
498 cache_setunresolved(new_nch);
499 cache_setvp(new_nch, newdp);
8a70992d 500 cache_unlock(new_nch);
21739618 501
77480a97
MD
502 /*
503 * Special handling of the root node
504 */
21739618
MD
505 if (rootvnode == olddp) {
506 vref(newdp);
77480a97 507 vfs_cache_setroot(newdp, cache_hold(new_nch));
21739618
MD
508 }
509
77480a97
MD
510 /*
511 * Pass newdp separately so the callback does not have to access
512 * it via new_nch->ncp->nc_vp.
513 */
514 info.old_nch = *old_nch;
515 info.new_nch = *new_nch;
516 info.new_vp = newdp;
586c4308 517 allproc_scan(checkdirs_callback, &info, 0);
8fa76237
MD
518 vput(newdp);
519}
520
521/*
522 * NOTE: callback is not MP safe because the scanned process's filedesc
523 * structure can be ripped out from under us, amoung other things.
524 */
525static int
526checkdirs_callback(struct proc *p, void *data)
527{
528 struct checkdirs_info *info = data;
529 struct filedesc *fdp;
28623bf9
MD
530 struct nchandle ncdrop1;
531 struct nchandle ncdrop2;
8fa76237
MD
532 struct vnode *vprele1;
533 struct vnode *vprele2;
534
535 if ((fdp = p->p_fd) != NULL) {
28623bf9
MD
536 cache_zero(&ncdrop1);
537 cache_zero(&ncdrop2);
8fa76237
MD
538 vprele1 = NULL;
539 vprele2 = NULL;
540
541 /*
542 * MPUNSAFE - XXX fdp can be pulled out from under a
543 * foreign process.
544 *
545 * A shared filedesc is ok, we don't have to copy it
546 * because we are making this change globally.
547 */
287a8577 548 spin_lock(&fdp->fd_spin);
77480a97
MD
549 if (fdp->fd_ncdir.mount == info->old_nch.mount &&
550 fdp->fd_ncdir.ncp == info->old_nch.ncp) {
8fa76237 551 vprele1 = fdp->fd_cdir;
77480a97
MD
552 vref(info->new_vp);
553 fdp->fd_cdir = info->new_vp;
8fa76237 554 ncdrop1 = fdp->fd_ncdir;
77480a97 555 cache_copy(&info->new_nch, &fdp->fd_ncdir);
984263bc 556 }
77480a97
MD
557 if (fdp->fd_nrdir.mount == info->old_nch.mount &&
558 fdp->fd_nrdir.ncp == info->old_nch.ncp) {
8fa76237 559 vprele2 = fdp->fd_rdir;
77480a97
MD
560 vref(info->new_vp);
561 fdp->fd_rdir = info->new_vp;
8fa76237 562 ncdrop2 = fdp->fd_nrdir;
77480a97 563 cache_copy(&info->new_nch, &fdp->fd_nrdir);
984263bc 564 }
287a8577 565 spin_unlock(&fdp->fd_spin);
28623bf9
MD
566 if (ncdrop1.ncp)
567 cache_drop(&ncdrop1);
568 if (ncdrop2.ncp)
569 cache_drop(&ncdrop2);
8fa76237
MD
570 if (vprele1)
571 vrele(vprele1);
572 if (vprele2)
573 vrele(vprele2);
984263bc 574 }
8fa76237 575 return(0);
984263bc
MD
576}
577
578/*
579 * Unmount a file system.
580 *
581 * Note: unmount takes a path to the vnode mounted on as argument,
582 * not special file (as before).
3919ced0 583 *
41c20dac 584 * umount_args(char *path, int flags)
3919ced0
MD
585 *
586 * MPALMOSTSAFE
41c20dac 587 */
984263bc 588int
753fd850 589sys_unmount(struct unmount_args *uap)
984263bc 590{
dadab5e9 591 struct thread *td = curthread;
f64b567c 592 struct proc *p __debugvar = td->td_proc;
d2293868 593 struct mount *mp = NULL;
fad57d0e 594 struct nlookupdata nd;
3919ced0 595 int error;
984263bc 596
dadab5e9 597 KKASSERT(p);
9910d07b 598 if (td->td_ucred->cr_prison != NULL) {
3919ced0
MD
599 error = EPERM;
600 goto done;
601 }
895c1f85 602 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT)))
3919ced0 603 goto done;
84c3a710 604
ab2eb4eb 605 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
fad57d0e
MD
606 if (error == 0)
607 error = nlookup(&nd);
fad57d0e 608 if (error)
d2293868 609 goto out;
fad57d0e 610
28623bf9 611 mp = nd.nl_nch.mount;
984263bc
MD
612
613 /*
614 * Only root, or the user that did the original mount is
615 * permitted to unmount this filesystem.
616 */
9910d07b 617 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) &&
895c1f85 618 (error = priv_check(td, PRIV_ROOT)))
d2293868 619 goto out;
984263bc
MD
620
621 /*
622 * Don't allow unmounting the root file system.
623 */
624 if (mp->mnt_flag & MNT_ROOTFS) {
d2293868
MD
625 error = EINVAL;
626 goto out;
984263bc
MD
627 }
628
629 /*
630 * Must be the root of the filesystem
631 */
28623bf9 632 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) {
d2293868
MD
633 error = EINVAL;
634 goto out;
984263bc 635 }
d2293868 636
a9d06cca
MD
637 /*
638 * If no error try to issue the unmount. We lose our cache
639 * ref when we call nlookup_done so we must hold the mount point
640 * to prevent use-after-free races.
641 */
d2293868 642out:
a9d06cca
MD
643 if (error == 0) {
644 mount_hold(mp);
645 nlookup_done(&nd);
51a529db 646 error = dounmount(mp, uap->flags, 0);
a9d06cca
MD
647 mount_drop(mp);
648 } else {
649 nlookup_done(&nd);
650 }
3919ced0 651done:
3919ced0 652 return (error);
984263bc
MD
653}
654
655/*
a9d06cca
MD
656 * Do the actual file system unmount (interlocked against the mountlist
657 * token and mp->mnt_token).
984263bc 658 */
861905fb
MD
659static int
660dounmount_interlock(struct mount *mp)
661{
662 if (mp->mnt_kern_flag & MNTK_UNMOUNT)
663 return (EBUSY);
664 mp->mnt_kern_flag |= MNTK_UNMOUNT;
665 return(0);
666}
667
32b7c8fd
AH
668static int
669unmount_allproc_cb(struct proc *p, void *arg)
670{
671 struct mount *mp;
672
673 if (p->p_textnch.ncp == NULL)
674 return 0;
675
676 mp = (struct mount *)arg;
677 if (p->p_textnch.mount == mp)
678 cache_drop(&p->p_textnch);
679
680 return 0;
681}
682
a9d06cca
MD
683/*
684 * The guts of the unmount code. The mount owns one ref and one hold
685 * count. If we successfully interlock the unmount, those refs are ours.
686 * (The ref is from mnt_ncmountpt).
51a529db
MD
687 *
688 * When halting we shortcut certain mount types such as devfs by not actually
689 * issuing the VFS_SYNC() or VFS_UNMOUNT(). They are still disconnected
690 * from the mountlist so higher-level filesytems can unmount cleanly.
691 *
692 * The mount types that allow QUICKHALT are: devfs, tmpfs, procfs.
a9d06cca 693 */
984263bc 694int
51a529db 695dounmount(struct mount *mp, int flags, int halting)
984263bc 696{
dcf26f79 697 struct namecache *ncp;
28623bf9 698 struct nchandle nch;
2ec4b00d 699 struct vnode *vp;
984263bc
MD
700 int error;
701 int async_flag;
2df9419c 702 int lflags;
28623bf9 703 int freeok = 1;
fdbec6d3 704 int retry;
51a529db 705 int quickhalt;
984263bc 706
ace53c28 707 lwkt_gettoken(&mp->mnt_token);
a9d06cca 708
51a529db
MD
709 /*
710 * When halting, certain mount points can essentially just
711 * be unhooked and otherwise ignored.
712 */
713 if (halting && (mp->mnt_kern_flag & MNTK_QUICKHALT)) {
714 quickhalt = 1;
715 freeok = 0;
716 } else {
717 quickhalt = 0;
718 }
719
720
861905fb 721 /*
a9d06cca 722 * Exclusive access for unmounting purposes.
861905fb
MD
723 */
724 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0)
effb40c8 725 goto out;
861905fb
MD
726
727 /*
a9d06cca
MD
728 * We now 'own' the last mp->mnt_refs
729 *
861905fb
MD
730 * Allow filesystems to detect that a forced unmount is in progress.
731 */
984263bc
MD
732 if (flags & MNT_FORCE)
733 mp->mnt_kern_flag |= MNTK_UNMOUNTF;
fdbec6d3 734 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK);
df4f70a6 735 error = lockmgr(&mp->mnt_lock, lflags);
984263bc
MD
736 if (error) {
737 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
ace53c28
MD
738 if (mp->mnt_kern_flag & MNTK_MWAIT) {
739 mp->mnt_kern_flag &= ~MNTK_MWAIT;
e38462a2 740 wakeup(mp);
ace53c28 741 }
effb40c8 742 goto out;
984263bc
MD
743 }
744
745 if (mp->mnt_flag & MNT_EXPUBLIC)
746 vfs_setpublicfs(NULL, NULL, NULL);
747
748 vfs_msync(mp, MNT_WAIT);
749 async_flag = mp->mnt_flag & MNT_ASYNC;
750 mp->mnt_flag &=~ MNT_ASYNC;
dcf26f79
MD
751
752 /*
28623bf9
MD
753 * If this filesystem isn't aliasing other filesystems,
754 * try to invalidate any remaining namecache entries and
755 * check the count afterwords.
a9d06cca
MD
756 *
757 * We own the last mnt_refs by owning mnt_ncmountpt.
dcf26f79 758 */
28623bf9
MD
759 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) {
760 cache_lock(&mp->mnt_ncmountpt);
761 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN);
762 cache_unlock(&mp->mnt_ncmountpt);
763
3536c341 764 cache_clearmntcache();
32b7c8fd
AH
765 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL &&
766 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) {
586c4308 767 allproc_scan(&unmount_allproc_cb, mp, 0);
32b7c8fd
AH
768 }
769
3536c341 770 cache_clearmntcache();
28623bf9
MD
771 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL &&
772 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) {
dcf26f79
MD
773
774 if ((flags & MNT_FORCE) == 0) {
775 error = EBUSY;
28623bf9
MD
776 mount_warning(mp, "Cannot unmount: "
777 "%d namecache "
778 "references still "
779 "present",
780 ncp->nc_refs - 1);
dcf26f79 781 } else {
28623bf9
MD
782 mount_warning(mp, "Forced unmount: "
783 "%d namecache "
784 "references still "
785 "present",
786 ncp->nc_refs - 1);
787 freeok = 0;
dcf26f79
MD
788 }
789 }
790 }
791
fdbec6d3
MD
792 /*
793 * Decomission our special mnt_syncer vnode. This also stops
794 * the vnlru code. If we are unable to unmount we recommission
795 * the vnode.
796 *
797 * Then sync the filesystem.
798 */
799 if ((vp = mp->mnt_syncer) != NULL) {
800 mp->mnt_syncer = NULL;
ee173d09 801 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE);
fdbec6d3
MD
802 vrele(vp);
803 }
51a529db
MD
804
805 if (quickhalt == 0) {
806 if ((mp->mnt_flag & MNT_RDONLY) == 0)
807 VFS_SYNC(mp, MNT_WAIT);
808 }
fdbec6d3 809
28623bf9
MD
810 /*
811 * nchandle records ref the mount structure. Expect a count of 1
812 * (our mount->mnt_ncmountpt).
fdbec6d3
MD
813 *
814 * Scans can get temporary refs on a mountpoint (thought really
815 * heavy duty stuff like cache_findmount() do not).
28623bf9 816 */
3536c341
MD
817 if (mp->mnt_refs != 1)
818 cache_clearmntcache();
fdbec6d3
MD
819 for (retry = 0; retry < 10 && mp->mnt_refs != 1; ++retry) {
820 cache_unmounting(mp);
821 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1);
3536c341 822 cache_clearmntcache();
fdbec6d3 823 }
28623bf9
MD
824 if (mp->mnt_refs != 1) {
825 if ((flags & MNT_FORCE) == 0) {
826 mount_warning(mp, "Cannot unmount: "
fdbec6d3 827 "%d mount refs still present",
9629eb35 828 mp->mnt_refs - 1);
28623bf9
MD
829 error = EBUSY;
830 } else {
831 mount_warning(mp, "Forced unmount: "
fdbec6d3 832 "%d mount refs still present",
9629eb35 833 mp->mnt_refs - 1);
28623bf9
MD
834 freeok = 0;
835 }
836 }
837
2ec4b00d 838 /*
fdbec6d3
MD
839 * So far so good, sync the filesystem once more and
840 * call the VFS unmount code if the sync succeeds.
2ec4b00d 841 */
51a529db 842 if (error == 0 && quickhalt == 0) {
ef560bee 843 if (mp->mnt_flag & MNT_RDONLY) {
dcf26f79 844 error = VFS_UNMOUNT(mp, flags);
ef560bee
TK
845 } else {
846 error = VFS_SYNC(mp, MNT_WAIT);
847 if ((error == 0) ||
848 (error == EOPNOTSUPP) || /* No sync */
849 (flags & MNT_FORCE)) {
850 error = VFS_UNMOUNT(mp, flags);
851 }
dcf26f79
MD
852 }
853 }
fdbec6d3
MD
854
855 /*
856 * If an error occurred we can still recover, restoring the
857 * syncer vnode and misc flags.
858 */
984263bc 859 if (error) {
41a01a4d
MD
860 if (mp->mnt_syncer == NULL)
861 vfs_allocate_syncvnode(mp);
984263bc
MD
862 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
863 mp->mnt_flag |= async_flag;
df4f70a6 864 lockmgr(&mp->mnt_lock, LK_RELEASE);
ace53c28
MD
865 if (mp->mnt_kern_flag & MNTK_MWAIT) {
866 mp->mnt_kern_flag &= ~MNTK_MWAIT;
e38462a2 867 wakeup(mp);
ace53c28 868 }
effb40c8 869 goto out;
984263bc 870 }
432b8263
MD
871 /*
872 * Clean up any journals still associated with the mount after
873 * filesystem activity has ceased.
874 */
875 journal_remove_all_journals(mp,
876 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0));
877
861905fb 878 mountlist_remove(mp);
0961aa92
MD
879
880 /*
881 * Remove any installed vnode ops here so the individual VFSs don't
882 * have to.
a9d06cca
MD
883 *
884 * mnt_refs should go to zero when we scrap mnt_ncmountpt.
ba92154c
MD
885 *
886 * When quickhalting we have to keep these intact because the
887 * underlying vnodes have not been destroyed, and some might be
888 * dirty.
0961aa92 889 */
ba92154c
MD
890 if (quickhalt == 0) {
891 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops);
892 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops);
893 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
894 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
895 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
896 }
0961aa92 897
28623bf9
MD
898 if (mp->mnt_ncmountpt.ncp != NULL) {
899 nch = mp->mnt_ncmountpt;
900 cache_zero(&mp->mnt_ncmountpt);
901 cache_clrmountpt(&nch);
902 cache_drop(&nch);
903 }
904 if (mp->mnt_ncmounton.ncp != NULL) {
07baed26 905 cache_unmounting(mp);
28623bf9
MD
906 nch = mp->mnt_ncmounton;
907 cache_zero(&mp->mnt_ncmounton);
908 cache_clrmountpt(&nch);
909 cache_drop(&nch);
984263bc 910 }
1d505369 911
984263bc 912 mp->mnt_vfc->vfc_refcount--;
51a529db
MD
913
914 /*
915 * If not quickhalting the mount, we expect there to be no
916 * vnodes left.
917 */
918 if (quickhalt == 0 && !TAILQ_EMPTY(&mp->mnt_nvnodelist))
984263bc 919 panic("unmount: dangling vnode");
a9d06cca
MD
920
921 /*
922 * Release the lock
923 */
df4f70a6 924 lockmgr(&mp->mnt_lock, LK_RELEASE);
ace53c28
MD
925 if (mp->mnt_kern_flag & MNTK_MWAIT) {
926 mp->mnt_kern_flag &= ~MNTK_MWAIT;
e38462a2 927 wakeup(mp);
ace53c28
MD
928 }
929
930 /*
931 * If we reach here and freeok != 0 we must free the mount.
a9d06cca
MD
932 * mnt_refs should already have dropped to 0, so if it is not
933 * zero we must cycle the caches and wait.
934 *
935 * When we are satisfied that the mount has disconnected we can
936 * drop the hold on the mp that represented the mount (though the
937 * caller might actually have another, so the caller's drop may
938 * do the actual free).
ace53c28
MD
939 */
940 if (freeok) {
a9d06cca 941 if (mp->mnt_refs > 0)
3536c341 942 cache_clearmntcache();
a9d06cca 943 while (mp->mnt_refs > 0) {
07baed26 944 cache_unmounting(mp);
ace53c28
MD
945 wakeup(mp);
946 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1);
3536c341 947 cache_clearmntcache();
ace53c28
MD
948 }
949 lwkt_reltoken(&mp->mnt_token);
9629eb35 950 mount_drop(mp);
ace53c28 951 mp = NULL;
ba92154c
MD
952 } else {
953 cache_clearmntcache();
ace53c28 954 }
effb40c8 955 error = 0;
5e25370d 956 KNOTE(&fs_klist, VQ_UNMOUNT);
effb40c8 957out:
ace53c28
MD
958 if (mp)
959 lwkt_reltoken(&mp->mnt_token);
effb40c8 960 return (error);
984263bc
MD
961}
962
28623bf9
MD
963static
964void
965mount_warning(struct mount *mp, const char *ctl, ...)
966{
967 char *ptr;
968 char *buf;
969 __va_list va;
970
971 __va_start(va, ctl);
08a7d6d8
MD
972 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL,
973 &ptr, &buf, 0) == 0) {
6ea70f76 974 kprintf("unmount(%s): ", ptr);
379210cb 975 kvprintf(ctl, va);
6ea70f76 976 kprintf("\n");
28623bf9
MD
977 kfree(buf, M_TEMP);
978 } else {
56bcacad
MD
979 kprintf("unmount(%p", mp);
980 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name)
981 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name);
982 kprintf("): ");
379210cb 983 kvprintf(ctl, va);
6ea70f76 984 kprintf("\n");
28623bf9
MD
985 }
986 __va_end(va);
987}
988
d3c546e6
MD
989/*
990 * Shim cache_fullpath() to handle the case where a process is chrooted into
991 * a subdirectory of a mount. In this case if the root mount matches the
992 * process root directory's mount we have to specify the process's root
993 * directory instead of the mount point, because the mount point might
994 * be above the root directory.
995 */
996static
997int
998mount_path(struct proc *p, struct mount *mp, char **rb, char **fb)
999{
1000 struct nchandle *nch;
d3c546e6
MD
1001
1002 if (p && p->p_fd->fd_nrdir.mount == mp)
1003 nch = &p->p_fd->fd_nrdir;
1004 else
1005 nch = &mp->mnt_ncmountpt;
08a7d6d8 1006 return(cache_fullpath(p, nch, NULL, rb, fb, 0));
d3c546e6
MD
1007}
1008
984263bc
MD
1009/*
1010 * Sync each mounted filesystem.
1011 */
984263bc
MD
1012
1013#ifdef DEBUG
1014static int syncprt = 0;
1015SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
6bdbb368 1016#endif /* DEBUG */
984263bc 1017
861905fb
MD
1018static int sync_callback(struct mount *mp, void *data);
1019
984263bc 1020int
753fd850 1021sys_sync(struct sync_args *uap)
984263bc 1022{
861905fb 1023 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD);
984263bc
MD
1024 return (0);
1025}
1026
861905fb
MD
1027static
1028int
1029sync_callback(struct mount *mp, void *data __unused)
1030{
1031 int asyncflag;
1032
1033 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
fb578eac 1034 lwkt_gettoken(&mp->mnt_token);
861905fb
MD
1035 asyncflag = mp->mnt_flag & MNT_ASYNC;
1036 mp->mnt_flag &= ~MNT_ASYNC;
fb578eac 1037 lwkt_reltoken(&mp->mnt_token);
861905fb 1038 vfs_msync(mp, MNT_NOWAIT);
904805ea 1039 VFS_SYNC(mp, MNT_NOWAIT);
fb578eac 1040 lwkt_gettoken(&mp->mnt_token);
861905fb 1041 mp->mnt_flag |= asyncflag;
fb578eac 1042 lwkt_reltoken(&mp->mnt_token);
861905fb
MD
1043 }
1044 return(0);
1045}
1046
984263bc
MD
1047/* XXX PRISON: could be per prison flag */
1048static int prison_quotas;
1049#if 0
1050SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
1051#endif
1052
1053/*
41c20dac
MD
1054 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg)
1055 *
984263bc 1056 * Change filesystem quotas.
3919ced0
MD
1057 *
1058 * MPALMOSTSAFE
984263bc 1059 */
984263bc 1060int
753fd850 1061sys_quotactl(struct quotactl_args *uap)
984263bc 1062{
fad57d0e
MD
1063 struct nlookupdata nd;
1064 struct thread *td;
41c20dac 1065 struct mount *mp;
984263bc 1066 int error;
984263bc 1067
fad57d0e 1068 td = curthread;
9910d07b 1069 if (td->td_ucred->cr_prison && !prison_quotas) {
3919ced0
MD
1070 error = EPERM;
1071 goto done;
1072 }
fad57d0e 1073
ab2eb4eb 1074 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
fad57d0e
MD
1075 if (error == 0)
1076 error = nlookup(&nd);
1077 if (error == 0) {
28623bf9 1078 mp = nd.nl_nch.mount;
ab2eb4eb 1079 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid,
acde96db 1080 uap->arg, nd.nl_cred);
fad57d0e
MD
1081 }
1082 nlookup_done(&nd);
3919ced0 1083done:
fad57d0e 1084 return (error);
984263bc
MD
1085}
1086
949ecb9b 1087/*
2281065e 1088 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen,
949ecb9b
MD
1089 * void *buf, int buflen)
1090 *
1091 * This function operates on a mount point and executes the specified
1092 * operation using the specified control data, and possibly returns data.
1093 *
1094 * The actual number of bytes stored in the result buffer is returned, 0
1095 * if none, otherwise an error is returned.
3919ced0
MD
1096 *
1097 * MPALMOSTSAFE
949ecb9b 1098 */
949ecb9b 1099int
753fd850 1100sys_mountctl(struct mountctl_args *uap)
949ecb9b
MD
1101{
1102 struct thread *td = curthread;
2281065e 1103 struct file *fp;
949ecb9b
MD
1104 void *ctl = NULL;
1105 void *buf = NULL;
1106 char *path = NULL;
1107 int error;
1108
1109 /*
1110 * Sanity and permissions checks. We must be root.
1111 */
9910d07b 1112 if (td->td_ucred->cr_prison != NULL)
949ecb9b 1113 return (EPERM);
dad088a5
MD
1114 if ((uap->op != MOUNTCTL_MOUNTFLAGS) &&
1115 (error = priv_check(td, PRIV_ROOT)) != 0)
949ecb9b
MD
1116 return (error);
1117
1118 /*
1119 * Argument length checks
1120 */
39b13188 1121 if (uap->ctllen < 0 || uap->ctllen > 1024)
949ecb9b 1122 return (EINVAL);
39b13188 1123 if (uap->buflen < 0 || uap->buflen > 16 * 1024)
949ecb9b
MD
1124 return (EINVAL);
1125 if (uap->path == NULL)
1126 return (EINVAL);
1127
1128 /*
1129 * Allocate the necessary buffers and copyin data
1130 */
70aac194 1131 path = objcache_get(namei_oc, M_WAITOK);
949ecb9b
MD
1132 error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
1133 if (error)
1134 goto done;
1135
1136 if (uap->ctllen) {
efda3bd0 1137 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO);
949ecb9b
MD
1138 error = copyin(uap->ctl, ctl, uap->ctllen);
1139 if (error)
1140 goto done;
1141 }
1142 if (uap->buflen)
efda3bd0 1143 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO);
949ecb9b 1144
2281065e
MD
1145 /*
1146 * Validate the descriptor
1147 */
ecda6326 1148 if (uap->fd >= 0) {
35949930 1149 fp = holdfp(td, uap->fd, -1);
ecda6326
MD
1150 if (fp == NULL) {
1151 error = EBADF;
1152 goto done;
1153 }
1154 } else {
1155 fp = NULL;
2281065e 1156 }
2281065e 1157
949ecb9b
MD
1158 /*
1159 * Execute the internal kernel function and clean up.
1160 */
35949930
MD
1161 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen,
1162 buf, uap->buflen, &uap->sysmsg_result);
2281065e 1163 if (fp)
35949930 1164 dropfp(td, uap->fd, fp);
949ecb9b
MD
1165 if (error == 0 && uap->sysmsg_result > 0)
1166 error = copyout(buf, uap->buf, uap->sysmsg_result);
1167done:
1168 if (path)
70aac194 1169 objcache_put(namei_oc, path);
949ecb9b 1170 if (ctl)
efda3bd0 1171 kfree(ctl, M_TEMP);
949ecb9b 1172 if (buf)
efda3bd0 1173 kfree(buf, M_TEMP);
949ecb9b
MD
1174 return (error);
1175}
1176
1177/*
1178 * Execute a mount control operation by resolving the path to a mount point
1179 * and calling vop_mountctl().
67863d04
MD
1180 *
1181 * Use the mount point from the nch instead of the vnode so nullfs mounts
1182 * can properly spike the VOP.
949ecb9b
MD
1183 */
1184int
2281065e
MD
1185kern_mountctl(const char *path, int op, struct file *fp,
1186 const void *ctl, int ctllen,
949ecb9b
MD
1187 void *buf, int buflen, int *res)
1188{
949ecb9b 1189 struct vnode *vp;
949ecb9b 1190 struct nlookupdata nd;
fda4c5f3 1191 struct nchandle nch;
5a3a4cf3 1192 struct mount *mp;
949ecb9b
MD
1193 int error;
1194
1195 *res = 0;
1196 vp = NULL;
1197 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW);
949ecb9b
MD
1198 if (error)
1199 return (error);
fda4c5f3
MD
1200 error = nlookup(&nd);
1201 if (error) {
1202 nlookup_done(&nd);
1203 return (error);
1204 }
1205 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
1206 if (error) {
1207 nlookup_done(&nd);
1208 return (error);
1209 }
1210
1211 /*
1212 * Yes, all this is needed to use the nch.mount below, because
1213 * we must maintain a ref on the mount to avoid ripouts (e.g.
1214 * due to heavy mount/unmount use by synth or poudriere).
1215 */
1216 nch = nd.nl_nch;
1217 cache_zero(&nd.nl_nch);
1218 cache_unlock(&nch);
1219 nlookup_done(&nd);
d246c910 1220 vn_unlock(vp);
949ecb9b 1221
5a3a4cf3
MD
1222 mp = nch.mount;
1223
949ecb9b
MD
1224 /*
1225 * Must be the root of the filesystem
1226 */
67863d04 1227 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) {
fda4c5f3 1228 cache_drop(&nch);
d246c910 1229 vrele(vp);
949ecb9b
MD
1230 return (EINVAL);
1231 }
5a3a4cf3
MD
1232 if (mp == NULL || mp->mnt_kern_flag & MNTK_UNMOUNT) {
1233 kprintf("kern_mountctl: Warning, \"%s\" racing unmount\n",
1234 path);
1235 cache_drop(&nch);
1236 vrele(vp);
1237 return (EINVAL);
1238 }
1239 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen,
aac0aabd 1240 buf, buflen, res);
d246c910 1241 vrele(vp);
fda4c5f3
MD
1242 cache_drop(&nch);
1243
949ecb9b
MD
1244 return (error);
1245}
1246
984263bc 1247int
fad57d0e 1248kern_statfs(struct nlookupdata *nd, struct statfs *buf)
984263bc 1249{
dadab5e9 1250 struct thread *td = curthread;
75ffff0d 1251 struct proc *p = td->td_proc;
41c20dac
MD
1252 struct mount *mp;
1253 struct statfs *sp;
75ffff0d 1254 char *fullpath, *freepath;
984263bc 1255 int error;
984263bc 1256
fad57d0e 1257 if ((error = nlookup(nd)) != 0)
984263bc 1258 return (error);
28623bf9 1259 mp = nd->nl_nch.mount;
984263bc 1260 sp = &mp->mnt_stat;
acde96db 1261 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0)
984263bc 1262 return (error);
75ffff0d 1263
d3c546e6 1264 error = mount_path(p, mp, &fullpath, &freepath);
75ffff0d
JS
1265 if (error)
1266 return(error);
1267 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1268 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
efda3bd0 1269 kfree(freepath, M_TEMP);
75ffff0d 1270
984263bc 1271 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
9697c509
DRJ
1272 bcopy(sp, buf, sizeof(*buf));
1273 /* Only root should have access to the fsid's. */
895c1f85 1274 if (priv_check(td, PRIV_ROOT))
9697c509
DRJ
1275 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
1276 return (0);
984263bc
MD
1277}
1278
1279/*
9697c509 1280 * statfs_args(char *path, struct statfs *buf)
41c20dac 1281 *
984263bc
MD
1282 * Get filesystem statistics.
1283 */
984263bc 1284int
753fd850 1285sys_statfs(struct statfs_args *uap)
9697c509 1286{
fad57d0e 1287 struct nlookupdata nd;
9697c509
DRJ
1288 struct statfs buf;
1289 int error;
1290
fad57d0e
MD
1291 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
1292 if (error == 0)
1293 error = kern_statfs(&nd, &buf);
1294 nlookup_done(&nd);
9697c509
DRJ
1295 if (error == 0)
1296 error = copyout(&buf, uap->buf, sizeof(*uap->buf));
1297 return (error);
1298}
1299
1300int
1301kern_fstatfs(int fd, struct statfs *buf)
984263bc 1302{
dadab5e9
MD
1303 struct thread *td = curthread;
1304 struct proc *p = td->td_proc;
984263bc
MD
1305 struct file *fp;
1306 struct mount *mp;
1fd87d54 1307 struct statfs *sp;
75ffff0d 1308 char *fullpath, *freepath;
984263bc 1309 int error;
984263bc 1310
dadab5e9 1311 KKASSERT(p);
35949930 1312 if ((error = holdvnode(td, fd, &fp)) != 0)
984263bc 1313 return (error);
92734e3d
MD
1314
1315 /*
1316 * Try to use mount info from any overlays rather than the
1317 * mount info for the underlying vnode, otherwise we will
1318 * fail when operating on null-mounted paths inside a chroot.
1319 */
1320 if ((mp = fp->f_nchandle.mount) == NULL)
1321 mp = ((struct vnode *)fp->f_data)->v_mount;
5b287bba
MD
1322 if (mp == NULL) {
1323 error = EBADF;
1324 goto done;
1325 }
1326 if (fp->f_cred == NULL) {
1327 error = EINVAL;
1328 goto done;
1329 }
984263bc 1330 sp = &mp->mnt_stat;
5b287bba
MD
1331 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0)
1332 goto done;
75ffff0d 1333
d3c546e6 1334 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0)
5b287bba 1335 goto done;
75ffff0d
JS
1336 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1337 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
efda3bd0 1338 kfree(freepath, M_TEMP);
75ffff0d 1339
984263bc 1340 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
9697c509 1341 bcopy(sp, buf, sizeof(*buf));
75ffff0d 1342
9697c509 1343 /* Only root should have access to the fsid's. */
895c1f85 1344 if (priv_check(td, PRIV_ROOT))
9697c509 1345 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
5b287bba
MD
1346 error = 0;
1347done:
1348 fdrop(fp);
1349 return (error);
9697c509
DRJ
1350}
1351
1352/*
1353 * fstatfs_args(int fd, struct statfs *buf)
1354 *
1355 * Get filesystem statistics.
1356 */
1357int
753fd850 1358sys_fstatfs(struct fstatfs_args *uap)
9697c509
DRJ
1359{
1360 struct statfs buf;
1361 int error;
1362
1363 error = kern_fstatfs(uap->fd, &buf);
1364
1365 if (error == 0)
1366 error = copyout(&buf, uap->buf, sizeof(*uap->buf));
1367 return (error);
984263bc
MD
1368}
1369
d9fad06e
MD
1370int
1371kern_statvfs(struct nlookupdata *nd, struct statvfs *buf)
1372{
1373 struct mount *mp;
1374 struct statvfs *sp;
1375 int error;
1376
1377 if ((error = nlookup(nd)) != 0)
1378 return (error);
1379 mp = nd->nl_nch.mount;
1380 sp = &mp->mnt_vstat;
1381 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0)
1382 return (error);
1383
1384 sp->f_flag = 0;
1385 if (mp->mnt_flag & MNT_RDONLY)
1386 sp->f_flag |= ST_RDONLY;
1387 if (mp->mnt_flag & MNT_NOSUID)
1388 sp->f_flag |= ST_NOSUID;
1389 bcopy(sp, buf, sizeof(*buf));
1390 return (0);
1391}
1392
1393/*
1394 * statfs_args(char *path, struct statfs *buf)
1395 *
1396 * Get filesystem statistics.
1397 */
1398int
1399sys_statvfs(struct statvfs_args *uap)
1400{
1401 struct nlookupdata nd;
1402 struct statvfs buf;
1403 int error;
1404
1405 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
1406 if (error == 0)
1407 error = kern_statvfs(&nd, &buf);
1408 nlookup_done(&nd);
1409 if (error == 0)
1410 error = copyout(&buf, uap->buf, sizeof(*uap->buf));
1411 return (error);
1412}
1413
1414int
1415kern_fstatvfs(int fd, struct statvfs *buf)
1416{
1417 struct thread *td = curthread;
d9fad06e
MD
1418 struct file *fp;
1419 struct mount *mp;
1420 struct statvfs *sp;
1421 int error;
1422
35949930 1423 if ((error = holdvnode(td, fd, &fp)) != 0)
d9fad06e 1424 return (error);
92734e3d
MD
1425 if ((mp = fp->f_nchandle.mount) == NULL)
1426 mp = ((struct vnode *)fp->f_data)->v_mount;
d9fad06e
MD
1427 if (mp == NULL) {
1428 error = EBADF;
1429 goto done;
1430 }
1431 if (fp->f_cred == NULL) {
1432 error = EINVAL;
1433 goto done;
1434 }
1435 sp = &mp->mnt_vstat;
1436 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0)
1437 goto done;
1438
1439 sp->f_flag = 0;
1440 if (mp->mnt_flag & MNT_RDONLY)
1441 sp->f_flag |= ST_RDONLY;
1442 if (mp->mnt_flag & MNT_NOSUID)
1443 sp->f_flag |= ST_NOSUID;
1444
1445 bcopy(sp, buf, sizeof(*buf));
1446 error = 0;
1447done:
1448 fdrop(fp);
1449 return (error);
1450}
1451
1452/*
1453 * fstatfs_args(int fd, struct statfs *buf)
1454 *
1455 * Get filesystem statistics.
1456 */
1457int
1458sys_fstatvfs(struct fstatvfs_args *uap)
1459{
1460 struct statvfs buf;
1461 int error;
1462
1463 error = kern_fstatvfs(uap->fd, &buf);
1464
1465 if (error == 0)
1466 error = copyout(&buf, uap->buf, sizeof(*uap->buf));
1467 return (error);
1468}
1469
984263bc 1470/*
41c20dac
MD
1471 * getfsstat_args(struct statfs *buf, long bufsize, int flags)
1472 *
984263bc
MD
1473 * Get statistics on all filesystems.
1474 */
861905fb
MD
1475
1476struct getfsstat_info {
1477 struct statfs *sfsp;
1478 long count;
1479 long maxcount;
1480 int error;
1481 int flags;
9910d07b 1482 struct thread *td;
861905fb
MD
1483};
1484
1485static int getfsstat_callback(struct mount *, void *);
1486
984263bc 1487int
753fd850 1488sys_getfsstat(struct getfsstat_args *uap)
984263bc 1489{
dadab5e9 1490 struct thread *td = curthread;
861905fb 1491 struct getfsstat_info info;
75ffff0d 1492
861905fb 1493 bzero(&info, sizeof(info));
861905fb
MD
1494
1495 info.maxcount = uap->bufsize / sizeof(struct statfs);
1496 info.sfsp = uap->buf;
1497 info.count = 0;
1498 info.flags = uap->flags;
9910d07b 1499 info.td = td;
861905fb
MD
1500
1501 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD);
1502 if (info.sfsp && info.count > info.maxcount)
1503 uap->sysmsg_result = info.maxcount;
75ffff0d 1504 else
861905fb
MD
1505 uap->sysmsg_result = info.count;
1506 return (info.error);
1507}
1508
1509static int
1510getfsstat_callback(struct mount *mp, void *data)
1511{
1512 struct getfsstat_info *info = data;
1513 struct statfs *sp;
1514 char *freepath;
1515 char *fullpath;
1516 int error;
1517
5c6a03ba
MD
1518 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc))
1519 return(0);
1520
861905fb 1521 if (info->sfsp && info->count < info->maxcount) {
861905fb
MD
1522 sp = &mp->mnt_stat;
1523
1524 /*
1525 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1526 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
1527 * overrides MNT_WAIT.
1528 */
1529 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
1530 (info->flags & MNT_WAIT)) &&
9910d07b 1531 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) {
861905fb
MD
1532 return(0);
1533 }
1534 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1535
9910d07b 1536 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath);
861905fb
MD
1537 if (error) {
1538 info->error = error;
1539 return(-1);
984263bc 1540 }
861905fb
MD
1541 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1542 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
efda3bd0 1543 kfree(freepath, M_TEMP);
861905fb
MD
1544
1545 error = copyout(sp, info->sfsp, sizeof(*sp));
1546 if (error) {
1547 info->error = error;
1548 return (-1);
984263bc 1549 }
861905fb 1550 ++info->sfsp;
984263bc 1551 }
861905fb
MD
1552 info->count++;
1553 return(0);
984263bc
MD
1554}
1555
00fe9d48
MD
1556/*
1557 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf,
1558 long bufsize, int flags)
1559 *
1560 * Get statistics on all filesystems.
1561 */
1562
1563struct getvfsstat_info {
1564 struct statfs *sfsp;
1565 struct statvfs *vsfsp;
1566 long count;
1567 long maxcount;
1568 int error;
1569 int flags;
9910d07b 1570 struct thread *td;
00fe9d48
MD
1571};
1572
1573static int getvfsstat_callback(struct mount *, void *);
1574
00fe9d48
MD
1575int
1576sys_getvfsstat(struct getvfsstat_args *uap)
1577{
1578 struct thread *td = curthread;
00fe9d48
MD
1579 struct getvfsstat_info info;
1580
1581 bzero(&info, sizeof(info));
1582
1583 info.maxcount = uap->vbufsize / sizeof(struct statvfs);
1584 info.sfsp = uap->buf;
1585 info.vsfsp = uap->vbuf;
1586 info.count = 0;
1587 info.flags = uap->flags;
9910d07b 1588 info.td = td;
00fe9d48
MD
1589
1590 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD);
1591 if (info.vsfsp && info.count > info.maxcount)
1592 uap->sysmsg_result = info.maxcount;
1593 else
1594 uap->sysmsg_result = info.count;
1595 return (info.error);
1596}
1597
1598static int
1599getvfsstat_callback(struct mount *mp, void *data)
1600{
1601 struct getvfsstat_info *info = data;
1602 struct statfs *sp;
1603 struct statvfs *vsp;
1604 char *freepath;
1605 char *fullpath;
1606 int error;
1607
5c6a03ba
MD
1608 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc))
1609 return(0);
1610
00fe9d48 1611 if (info->vsfsp && info->count < info->maxcount) {
00fe9d48
MD
1612 sp = &mp->mnt_stat;
1613 vsp = &mp->mnt_vstat;
1614
1615 /*
1616 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1617 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
1618 * overrides MNT_WAIT.
1619 */
1620 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
1621 (info->flags & MNT_WAIT)) &&
9910d07b 1622 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) {
00fe9d48
MD
1623 return(0);
1624 }
1625 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1626
1627 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
1628 (info->flags & MNT_WAIT)) &&
9910d07b 1629 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) {
00fe9d48
MD
1630 return(0);
1631 }
1632 vsp->f_flag = 0;
1633 if (mp->mnt_flag & MNT_RDONLY)
1634 vsp->f_flag |= ST_RDONLY;
1635 if (mp->mnt_flag & MNT_NOSUID)
1636 vsp->f_flag |= ST_NOSUID;
1637
9910d07b 1638 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath);
00fe9d48
MD
1639 if (error) {
1640 info->error = error;
1641 return(-1);
1642 }
1643 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1644 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
1645 kfree(freepath, M_TEMP);
1646
1647 error = copyout(sp, info->sfsp, sizeof(*sp));
1648 if (error == 0)
1649 error = copyout(vsp, info->vsfsp, sizeof(*vsp));
1650 if (error) {
1651 info->error = error;
1652 return (-1);
1653 }
1654 ++info->sfsp;
1655 ++info->vsfsp;
1656 }
1657 info->count++;
1658 return(0);
1659}
1660
1661
984263bc 1662/*
41c20dac
MD
1663 * fchdir_args(int fd)
1664 *
984263bc
MD
1665 * Change current working directory to a given file descriptor.
1666 */
984263bc 1667int
753fd850 1668sys_fchdir(struct fchdir_args *uap)
984263bc 1669{
dadab5e9
MD
1670 struct thread *td = curthread;
1671 struct proc *p = td->td_proc;
41c20dac 1672 struct filedesc *fdp = p->p_fd;
21739618 1673 struct vnode *vp, *ovp;
984263bc
MD
1674 struct mount *mp;
1675 struct file *fp;
28623bf9 1676 struct nchandle nch, onch, tnch;
984263bc
MD
1677 int error;
1678
35949930 1679 if ((error = holdvnode(td, uap->fd, &fp)) != 0)
984263bc 1680 return (error);
0ac7b714 1681 lwkt_gettoken(&p->p_token);
984263bc 1682 vp = (struct vnode *)fp->f_data;
597aea93 1683 vref(vp);
12cdc371 1684 vn_lock(vp, LK_SHARED | LK_RETRY);
609c6f34 1685 if (fp->f_nchandle.ncp == NULL)
984263bc
MD
1686 error = ENOTDIR;
1687 else
609c6f34 1688 error = checkvp_chdir(vp, td);
984263bc
MD
1689 if (error) {
1690 vput(vp);
3919ced0 1691 goto done;
984263bc 1692 }
28623bf9 1693 cache_copy(&fp->f_nchandle, &nch);
1d505369
MD
1694
1695 /*
1696 * If the ncp has become a mount point, traverse through
1697 * the mount point.
1698 */
1699
28623bf9
MD
1700 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) &&
1701 (mp = cache_findmount(&nch)) != NULL
1d505369 1702 ) {
28623bf9 1703 error = nlookup_mp(mp, &tnch);
21739618 1704 if (error == 0) {
28623bf9 1705 cache_unlock(&tnch); /* leave ref intact */
21739618 1706 vput(vp);
28623bf9 1707 vp = tnch.ncp->nc_vp;
87de5057 1708 error = vget(vp, LK_SHARED);
21739618 1709 KKASSERT(error == 0);
28623bf9
MD
1710 cache_drop(&nch);
1711 nch = tnch;
21739618 1712 }
6fa06591 1713 cache_dropmount(mp);
21739618
MD
1714 }
1715 if (error == 0) {
cff27bad 1716 spin_lock(&fdp->fd_spin);
21739618 1717 ovp = fdp->fd_cdir;
28623bf9 1718 onch = fdp->fd_ncdir;
21739618 1719 fdp->fd_cdir = vp;
28623bf9 1720 fdp->fd_ncdir = nch;
cff27bad
MD
1721 spin_unlock(&fdp->fd_spin);
1722 vn_unlock(vp); /* leave ref intact */
28623bf9 1723 cache_drop(&onch);
21739618
MD
1724 vrele(ovp);
1725 } else {
28623bf9 1726 cache_drop(&nch);
21739618
MD
1727 vput(vp);
1728 }
5b287bba 1729 fdrop(fp);
3919ced0 1730done:
0ac7b714 1731 lwkt_reltoken(&p->p_token);
21739618 1732 return (error);
984263bc
MD
1733}
1734
9697c509 1735int
21739618 1736kern_chdir(struct nlookupdata *nd)
9697c509
DRJ
1737{
1738 struct thread *td = curthread;
1739 struct proc *p = td->td_proc;
1740 struct filedesc *fdp = p->p_fd;
21739618 1741 struct vnode *vp, *ovp;
28623bf9 1742 struct nchandle onch;
9697c509
DRJ
1743 int error;
1744
12cdc371 1745 nd->nl_flags |= NLC_SHAREDLOCK;
21739618 1746 if ((error = nlookup(nd)) != 0)
9697c509 1747 return (error);
28623bf9 1748 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL)
21739618 1749 return (ENOENT);
87de5057 1750 if ((error = vget(vp, LK_SHARED)) != 0)
21739618
MD
1751 return (error);
1752
0ac7b714 1753 lwkt_gettoken(&p->p_token);
21739618 1754 error = checkvp_chdir(vp, td);
a11aaa81 1755 vn_unlock(vp);
21739618 1756 if (error == 0) {
cff27bad 1757 spin_lock(&fdp->fd_spin);
21739618 1758 ovp = fdp->fd_cdir;
28623bf9 1759 onch = fdp->fd_ncdir;
28623bf9 1760 fdp->fd_ncdir = nd->nl_nch;
21739618 1761 fdp->fd_cdir = vp;
cff27bad
MD
1762 spin_unlock(&fdp->fd_spin);
1763 cache_unlock(&nd->nl_nch); /* leave reference intact */
28623bf9 1764 cache_drop(&onch);
21739618 1765 vrele(ovp);
28623bf9 1766 cache_zero(&nd->nl_nch);
21739618
MD
1767 } else {
1768 vrele(vp);
e24b948e 1769 }
0ac7b714 1770 lwkt_reltoken(&p->p_token);
e24b948e 1771 return (error);
9697c509
DRJ
1772}
1773
984263bc 1774/*
41c20dac
MD
1775 * chdir_args(char *path)
1776 *
984263bc
MD
1777 * Change current working directory (``.'').
1778 */
984263bc 1779int
753fd850 1780sys_chdir(struct chdir_args *uap)
984263bc 1781{
21739618 1782 struct nlookupdata nd;
9697c509 1783 int error;
984263bc 1784
21739618 1785 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
fad57d0e 1786 if (error == 0)
21739618 1787 error = kern_chdir(&nd);
fad57d0e 1788 nlookup_done(&nd);
9697c509 1789 return (error);
984263bc
MD
1790}
1791
1792/*
1793 * Helper function for raised chroot(2) security function: Refuse if
1794 * any filedescriptors are open directories.
1795 */
1796static int
35949930 1797chroot_refuse_vdir_fds(thread_t td, struct filedesc *fdp)
984263bc
MD
1798{
1799 struct vnode *vp;
1800 struct file *fp;
1801 int error;
1802 int fd;
1803
1804 for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
35949930 1805 if ((error = holdvnode(td, fd, &fp)) != 0)
984263bc
MD
1806 continue;
1807 vp = (struct vnode *)fp->f_data;
5b287bba
MD
1808 if (vp->v_type != VDIR) {
1809 fdrop(fp);
984263bc 1810 continue;
5b287bba
MD
1811 }
1812 fdrop(fp);
984263bc
MD
1813 return(EPERM);
1814 }
1815 return (0);
1816}
1817
1818/*
1819 * This sysctl determines if we will allow a process to chroot(2) if it
1820 * has a directory open:
1821 * 0: disallowed for all processes.
1822 * 1: allowed for processes that were not already chroot(2)'ed.
1823 * 2: allowed for all processes.
1824 */
1825
1826static int chroot_allow_open_directories = 1;
1827
1828SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
1829 &chroot_allow_open_directories, 0, "");
1830
e24b948e 1831/*
21739618
MD
1832 * chroot to the specified namecache entry. We obtain the vp from the
1833 * namecache data. The passed ncp must be locked and referenced and will
1834 * remain locked and referenced on return.
e24b948e 1835 */
53dd6631 1836int
28623bf9 1837kern_chroot(struct nchandle *nch)
e24b948e
MD
1838{
1839 struct thread *td = curthread;
1840 struct proc *p = td->td_proc;
1841 struct filedesc *fdp = p->p_fd;
21739618 1842 struct vnode *vp;
e24b948e
MD
1843 int error;
1844
1845 /*
ca3cd02d 1846 * Only privileged user can chroot
e24b948e 1847 */
9910d07b 1848 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0);
3a591c90 1849 if (error)
e24b948e
MD
1850 return (error);
1851
1852 /*
1853 * Disallow open directory descriptors (fchdir() breakouts).
1854 */
1855 if (chroot_allow_open_directories == 0 ||
1856 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
35949930 1857 if ((error = chroot_refuse_vdir_fds(td, fdp)) != 0)
e24b948e
MD
1858 return (error);
1859 }
28623bf9 1860 if ((vp = nch->ncp->nc_vp) == NULL)
21739618
MD
1861 return (ENOENT);
1862
87de5057 1863 if ((error = vget(vp, LK_SHARED)) != 0)
21739618 1864 return (error);
e24b948e
MD
1865
1866 /*
1867 * Check the validity of vp as a directory to change to and
1868 * associate it with rdir/jdir.
1869 */
21739618 1870 error = checkvp_chdir(vp, td);
a11aaa81 1871 vn_unlock(vp); /* leave reference intact */
21739618 1872 if (error == 0) {
e7e1189f 1873 lwkt_gettoken(&p->p_token);
e24b948e 1874 vrele(fdp->fd_rdir);
21739618 1875 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */
28623bf9
MD
1876 cache_drop(&fdp->fd_nrdir);
1877 cache_copy(nch, &fdp->fd_nrdir);
e24b948e
MD
1878 if (fdp->fd_jdir == NULL) {
1879 fdp->fd_jdir = vp;
597aea93 1880 vref(fdp->fd_jdir);
28623bf9 1881 cache_copy(nch, &fdp->fd_njdir);
e24b948e 1882 }
e7e1189f
MD
1883 if ((p->p_flags & P_DIDCHROOT) == 0) {
1884 p->p_flags |= P_DIDCHROOT;
1885 if (p->p_depth <= 65535 - 32)
1886 p->p_depth += 32;
1887 }
1888 lwkt_reltoken(&p->p_token);
21739618
MD
1889 } else {
1890 vrele(vp);
e24b948e
MD
1891 }
1892 return (error);
1893}
1894
984263bc 1895/*
41c20dac
MD
1896 * chroot_args(char *path)
1897 *
984263bc
MD
1898 * Change notion of root (``/'') directory.
1899 */
984263bc 1900int
753fd850 1901sys_chroot(struct chroot_args *uap)
984263bc 1902{
f64b567c 1903 struct thread *td __debugvar = curthread;
21739618 1904 struct nlookupdata nd;
e24b948e 1905 int error;
984263bc 1906
e24b948e 1907 KKASSERT(td->td_proc);
21739618 1908 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
3919ced0
MD
1909 if (error == 0) {
1910 nd.nl_flags |= NLC_EXEC;
1911 error = nlookup(&nd);
1912 if (error == 0)
1913 error = kern_chroot(&nd.nl_nch);
53dd6631 1914 }
fad57d0e 1915 nlookup_done(&nd);
53dd6631 1916 return(error);
984263bc
MD
1917}
1918
72f12c5b
AH
1919int
1920sys_chroot_kernel(struct chroot_kernel_args *uap)
1921{
1922 struct thread *td = curthread;
1923 struct nlookupdata nd;
1924 struct nchandle *nch;
1925 struct vnode *vp;
1926 int error;
1927
72f12c5b
AH
1928 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
1929 if (error)
1930 goto error_nond;
1931
1932 error = nlookup(&nd);
1933 if (error)
1934 goto error_out;
1935
1936 nch = &nd.nl_nch;
1937
1938 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0);
1939 if (error)
1940 goto error_out;
1941
1942 if ((vp = nch->ncp->nc_vp) == NULL) {
1943 error = ENOENT;
1944 goto error_out;
1945 }
1946
1947 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0)
1948 goto error_out;
1949
1950 kprintf("chroot_kernel: set new rootnch/rootvnode to %s\n", uap->path);
1951 vfs_cache_setroot(vp, cache_hold(nch));
1952
1953error_out:
1954 nlookup_done(&nd);
1955error_nond:
72f12c5b
AH
1956 return(error);
1957}
1958
984263bc 1959/*
e24b948e
MD
1960 * Common routine for chroot and chdir. Given a locked, referenced vnode,
1961 * determine whether it is legal to chdir to the vnode. The vnode's state
1962 * is not changed by this call.
984263bc 1963 */
59b728a7 1964static int
e24b948e 1965checkvp_chdir(struct vnode *vp, struct thread *td)
984263bc 1966{
984263bc
MD
1967 int error;
1968
984263bc
MD
1969 if (vp->v_type != VDIR)
1970 error = ENOTDIR;
1971 else
9910d07b 1972 error = VOP_EACCESS(vp, VEXEC, td->td_ucred);
984263bc
MD
1973 return (error);
1974}
1975
984263bc 1976int
fad57d0e 1977kern_open(struct nlookupdata *nd, int oflags, int mode, int *res)
984263bc 1978{
dadab5e9
MD
1979 struct thread *td = curthread;
1980 struct proc *p = td->td_proc;
fbfe4e7d 1981 struct lwp *lp = td->td_lwp;
41c20dac 1982 struct filedesc *fdp = p->p_fd;
9697c509 1983 int cmode, flags;
984263bc 1984 struct file *nfp;
fad57d0e
MD
1985 struct file *fp;
1986 struct vnode *vp;
6e4ea98e 1987 int type, indx, error = 0;
984263bc 1988 struct flock lf;
984263bc 1989
984263bc
MD
1990 if ((oflags & O_ACCMODE) == O_ACCMODE)
1991 return (EINVAL);
1992 flags = FFLAGS(oflags);
f3a2d8c4 1993 error = falloc(lp, &nfp, NULL);
984263bc
MD
1994 if (error)
1995 return (error);
1996 fp = nfp;
3a907475 1997 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
fad57d0e 1998
984263bc 1999 /*
fad57d0e
MD
2000 * XXX p_dupfd is a real mess. It allows a device to return a
2001 * file descriptor to be duplicated rather then doing the open
2002 * itself.
984263bc 2003 */
fbfe4e7d 2004 lp->lwp_dupfd = -1;
21739618
MD
2005
2006 /*
fad57d0e
MD
2007 * Call vn_open() to do the lookup and assign the vnode to the
2008 * file pointer. vn_open() does not change the ref count on fp
2009 * and the vnode, on success, will be inherited by the file pointer
2010 * and unlocked.
12cdc371
MD
2011 *
2012 * Request a shared lock on the vnode if possible.
864231c1
MD
2013 *
2014 * Executable binaries can race VTEXT against O_RDWR opens, so
2015 * use an exclusive lock for O_RDWR opens as well.
2016 *
2017 * NOTE: We need a flag to separate terminal vnode locking from
2018 * parent locking. O_CREAT needs parent locking, but O_TRUNC
2019 * and O_RDWR only need to lock the terminal vnode exclusively.
21739618 2020 */
fad57d0e 2021 nd->nl_flags |= NLC_LOCKVP;
864231c1 2022 if ((flags & (O_CREAT|O_TRUNC|O_RDWR)) == 0)
12cdc371
MD
2023 nd->nl_flags |= NLC_SHAREDLOCK;
2024
fad57d0e
MD
2025 error = vn_open(nd, fp, flags, cmode);
2026 nlookup_done(nd);
12cdc371 2027
984263bc 2028 if (error) {
984263bc
MD
2029 /*
2030 * handle special fdopen() case. bleh. dupfdopen() is
2031 * responsible for dropping the old contents of ofiles[indx]
2032 * if it succeeds.
fad57d0e 2033 *
259b8ea0
MD
2034 * Note that fsetfd() will add a ref to fp which represents
2035 * the fd_files[] assignment. We must still drop our
2036 * reference.
984263bc 2037 */
fbfe4e7d 2038 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) {
259b8ea0 2039 if (fdalloc(p, 0, &indx) == 0) {
35949930 2040 error = dupfdopen(td, indx, lp->lwp_dupfd, flags, error);
fad57d0e
MD
2041 if (error == 0) {
2042 *res = indx;
9f87144f 2043 fdrop(fp); /* our ref */
fad57d0e
MD
2044 return (0);
2045 }
f3a2d8c4 2046 fsetfd(fdp, NULL, indx);
fad57d0e 2047 }
984263bc 2048 }
9f87144f 2049 fdrop(fp); /* our ref */
984263bc
MD
2050 if (error == ERESTART)
2051 error = EINTR;
2052 return (error);
2053 }
fad57d0e
MD
2054
2055 /*
2056 * ref the vnode for ourselves so it can't be ripped out from under
2057 * is. XXX need an ND flag to request that the vnode be returned
2058 * anyway.
259b8ea0
MD
2059 *
2060 * Reserve a file descriptor but do not assign it until the open
2061 * succeeds.
fad57d0e
MD
2062 */
2063 vp = (struct vnode *)fp->f_data;
2064 vref(vp);
259b8ea0 2065 if ((error = fdalloc(p, 0, &indx)) != 0) {
9f87144f 2066 fdrop(fp);
fad57d0e
MD
2067 vrele(vp);
2068 return (error);
2069 }
2070
2071 /*
2072 * If no error occurs the vp will have been assigned to the file
2073 * pointer.
2074 */
fbfe4e7d 2075 lp->lwp_dupfd = 0;
984263bc 2076
984263bc
MD
2077 if (flags & (O_EXLOCK | O_SHLOCK)) {
2078 lf.l_whence = SEEK_SET;
2079 lf.l_start = 0;
2080 lf.l_len = 0;
2081 if (flags & O_EXLOCK)
2082 lf.l_type = F_WRLCK;
2083 else
2084 lf.l_type = F_RDLCK;
71c18fe3
MD
2085 if (flags & FNONBLOCK)
2086 type = 0;
2087 else
2088 type = F_WAIT;
fad57d0e 2089
984263bc
MD
2090 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
2091 /*
259b8ea0
MD
2092 * lock request failed. Clean up the reserved
2093 * descriptor.
984263bc 2094 */
fad57d0e 2095 vrele(vp);
f3a2d8c4 2096 fsetfd(fdp, NULL, indx);
9f87144f 2097 fdrop(fp);
984263bc
MD
2098 return (error);
2099 }
9f36effd 2100 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */
984263bc 2101 }
7540ab49
MD
2102#if 0
2103 /*
2104 * Assert that all regular file vnodes were created with a object.
2105 */
2106 KASSERT(vp->v_type != VREG || vp->v_object != NULL,
2107 ("open: regular file has no backing object after vn_open"));
2108#endif
984263bc 2109
fad57d0e 2110 vrele(vp);
21739618 2111
984263bc
MD
2112 /*
2113 * release our private reference, leaving the one associated with the
2114 * descriptor table intact.
2115 */
330497ce
MD
2116 if (oflags & O_CLOEXEC)
2117 fdp->fd_files[indx].fileflags |= UF_EXCLOSE;
f3a2d8c4 2118 fsetfd(fdp, fp, indx);
9f87144f 2119 fdrop(fp);
9697c509 2120 *res = indx;
35949930 2121
6e4ea98e 2122 return (error);
984263bc
MD
2123}
2124
984263bc 2125/*
9697c509 2126 * open_args(char *path, int flags, int mode)
41c20dac 2127 *
9697c509
DRJ
2128 * Check permissions, allocate an open file structure,
2129 * and call the device open routine if any.
984263bc 2130 */
984263bc 2131int
753fd850 2132sys_open(struct open_args *uap)
984263bc 2133{
fad57d0e 2134 struct nlookupdata nd;
9697c509
DRJ
2135 int error;
2136
85dbab7f 2137 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
fad57d0e
MD
2138 if (error == 0) {
2139 error = kern_open(&nd, uap->flags,
2140 uap->mode, &uap->sysmsg_result);
2141 }
2142 nlookup_done(&nd);
9697c509 2143 return (error);
984263bc 2144}
984263bc 2145
83a11774
NT
2146/*
2147 * openat_args(int fd, char *path, int flags, int mode)
2148 */
2149int
2150sys_openat(struct openat_args *uap)
2151{
83a11774
NT
2152 struct nlookupdata nd;
2153 int error;
1db695af 2154 struct file *fp;
83a11774 2155
1db695af
NT
2156 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
2157 if (error == 0) {
2158 error = kern_open(&nd, uap->flags, uap->mode,
2159 &uap->sysmsg_result);
83a11774 2160 }
1db695af 2161 nlookup_done_at(&nd, fp);
83a11774
NT
2162 return (error);
2163}
2164
b03976ae
SW
2165int
2166kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor)
2167{
2168 struct thread *td = curthread;
2169 struct proc *p = td->td_proc;
2170 struct vnode *vp;
2171 struct vattr vattr;
2172 int error;
2173 int whiteout = 0;
2174
2175 KKASSERT(p);
2176
2177 VATTR_NULL(&vattr);
2178 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
2179 vattr.va_rmajor = rmajor;
2180 vattr.va_rminor = rminor;
2181
2182 switch (mode & S_IFMT) {
2183 case S_IFMT: /* used by badsect to flag bad sectors */
2184 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0);
2185 vattr.va_type = VBAD;
2186 break;
2187 case S_IFCHR:
2188 error = priv_check(td, PRIV_VFS_MKNOD_DEV);
2189 vattr.va_type = VCHR;
2190 break;
2191 case S_IFBLK:
2192 error = priv_check(td, PRIV_VFS_MKNOD_DEV);
2193 vattr.va_type = VBLK;
2194 break;
2195 case S_IFWHT:
2196 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0);
2197 whiteout = 1;
2198 break;
2199 case S_IFDIR: /* special directories support for HAMMER */
2200 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0);
2201 vattr.va_type = VDIR;
2202 break;
2203 default:
2204 error = EINVAL;
2205 break;
2206 }
2207
2208 if (error)
2209 return (error);
2210
2211 bwillinode(1);
2212 nd->nl_flags |= NLC_CREATE | NLC_REFDVP;
2213 if ((error = nlookup(nd)) != 0)
2214 return (error);
2215 if (nd->nl_nch.ncp->nc_vp)
2216 return (EEXIST);
2217 if ((error = ncp_writechk(&nd->nl_nch)) != 0)
2218 return (error);
2219
2220 if (whiteout) {
2221 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp,
2222 nd->nl_cred, NAMEI_CREATE);
2223 } else {
2224 vp = NULL;
2225 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp,
2226 &vp, nd->nl_cred, &vattr);
2227 if (error == 0)
2228 vput(vp);
2229 }
2230 return (error);
2231}
2232
9697c509
DRJ
2233/*
2234 * mknod_args(char *path, int mode, int dev)
2235 *
2236 * Create a special file.
2237 */
2238int
753fd850 2239sys_mknod(struct mknod_args *uap)
9697c509 2240{
fad57d0e 2241 struct nlookupdata nd;
9697c509
DRJ
2242 int error;
2243
b03976ae
SW
2244 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2245 if (error == 0) {
2246 error = kern_mknod(&nd, uap->mode,
2247 umajor(uap->dev), uminor(uap->dev));
0e9b9130 2248 }
b03976ae 2249 nlookup_done(&nd);
984263bc
MD
2250 return (error);
2251}
2252
6d98f93c
SW
2253/*
2254 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev)
2255 *
2256 * Create a special file. The path is relative to the directory associated
2257 * with fd.
6d98f93c
SW
2258 */
2259int
2260sys_mknodat(struct mknodat_args *uap)
2261{
2262 struct nlookupdata nd;
2263 struct file *fp;
2264 int error;
2265
b03976ae
SW
2266 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
2267 if (error == 0) {
2268 error = kern_mknod(&nd, uap->mode,
2269 umajor(uap->dev), uminor(uap->dev));
6d98f93c 2270 }
b03976ae 2271 nlookup_done_at(&nd, fp);
6d98f93c
SW
2272 return (error);
2273}
2274
984263bc 2275int
fad57d0e 2276kern_mkfifo(struct nlookupdata *nd, int mode)
984263bc 2277{
dadab5e9
MD
2278 struct thread *td = curthread;
2279 struct proc *p = td->td_proc;
984263bc 2280 struct vattr vattr;
fad57d0e 2281 struct vnode *vp;
984263bc 2282 int error;
984263bc 2283
c4df9635 2284 bwillinode(1);
fad57d0e 2285
5312fa43 2286 nd->nl_flags |= NLC_CREATE | NLC_REFDVP;
fad57d0e 2287 if ((error = nlookup(nd)) != 0)
984263bc 2288 return (error);
28623bf9 2289 if (nd->nl_nch.ncp->nc_vp)
984263bc 2290 return (EEXIST);
28623bf9 2291 if ((error = ncp_writechk(&nd->nl_nch)) != 0)
72310cfb 2292 return (error);
fad57d0e 2293
984263bc
MD
2294 VATTR_NULL(&vattr);
2295 vattr.va_type = VFIFO;
136178b3 2296 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
fad57d0e 2297 vp = NULL;
5312fa43 2298 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr);
984263bc 2299 if (error == 0)
fad57d0e 2300 vput(vp);
136178b3
DRJ
2301 return (error);
2302}
2303
2304/*
2305 * mkfifo_args(char *path, int mode)
2306 *
2307 * Create a named pipe.
2308 */
2309int
753fd850 2310sys_mkfifo(struct mkfifo_args *uap)
136178b3 2311{
fad57d0e 2312 struct nlookupdata nd;
136178b3
DRJ
2313 int error;
2314
fad57d0e
MD
2315 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2316 if (error == 0)
2317 error = kern_mkfifo(&nd, uap->mode);
2318 nlookup_done(&nd);
984263bc
MD
2319 return (error);
2320}
2321
6d98f93c
SW
2322/*
2323 * mkfifoat_args(int fd, char *path, mode_t mode)
2324 *
2325 * Create a named pipe. The path is relative to the directory associated
2326 * with fd.
6d98f93c
SW
2327 */
2328int
2329sys_mkfifoat(struct mkfifoat_args *uap)
2330{
2331 struct nlookupdata nd;
2332 struct file *fp;
2333 int error;
2334
6d98f93c
SW
2335 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
2336 if (error == 0)
2337 error = kern_mkfifo(&nd, uap->mode);
2338 nlookup_done_at(&nd, fp);
6d98f93c
SW
2339 return (error);
2340}
2341
8c40f81e 2342static int hardlink_check_uid = 0;
43a0f7ae 2343SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
8c40f81e
MD
2344 &hardlink_check_uid, 0,
2345 "Unprivileged processes cannot create hard links to files owned by other "
2346 "users");
2347static int hardlink_check_gid = 0;
43a0f7ae 2348SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
8c40f81e
MD
2349 &hardlink_check_gid, 0,
2350 "Unprivileged processes cannot create hard links to files owned by other "
2351 "groups");
2352
2353static int
2354can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
2355{
2356 struct vattr va;
2357 int error;
2358
2359 /*
2360 * Shortcut if disabled
2361 */
2362 if (hardlink_check_uid == 0 && hardlink_check_gid == 0)
2363 return (0);
2364
2365 /*
f6df0641 2366 * Privileged user can always hardlink
8c40f81e 2367 */
f6df0641 2368 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0)
8c40f81e
MD
2369 return (0);
2370
2371 /*
2372 * Otherwise only if the originating file is owned by the
2373 * same user or group. Note that any group is allowed if
2374 * the file is owned by the caller.
2375 */
87de5057 2376 error = VOP_GETATTR(vp, &va);
8c40f81e
MD
2377 if (error != 0)
2378 return (error);
2379
2380 if (hardlink_check_uid) {
2381 if (cred->cr_uid != va.va_uid)
2382 return (EPERM);
2383 }
2384
2385 if (hardlink_check_gid) {
2386 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred))
2387 return (EPERM);
2388 }
2389
2390 return (0);
2391}
2392
984263bc 2393int
fad57d0e 2394kern_link(struct nlookupdata *nd, struct nlookupdata *linknd)
984263bc 2395{
dadab5e9 2396 struct thread *td = curthread;
41c20dac 2397 struct vnode *vp;
984263bc
MD
2398 int error;
2399
fad57d0e
MD
2400 /*
2401 * Lookup the source and obtained a locked vnode.
2402 *
3a907475
MD
2403 * You may only hardlink a file which you have write permission
2404 * on or which you own.
2405 *
fad57d0e
MD
2406 * XXX relookup on vget failure / race ?
2407 */
c4df9635 2408 bwillinode(1);
3a907475 2409 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK;
fad57d0e 2410 if ((error = nlookup(nd)) != 0)
984263bc 2411 return (error);
28623bf9 2412 vp = nd->nl_nch.ncp->nc_vp;
fad57d0e 2413 KKASSERT(vp != NULL);
984263bc 2414 if (vp->v_type == VDIR)
fad57d0e 2415 return (EPERM); /* POSIX */
28623bf9 2416 if ((error = ncp_writechk(&nd->nl_nch)) != 0)
72310cfb 2417 return (error);
87de5057 2418 if ((error = vget(vp, LK_EXCLUSIVE)) != 0)
fad57d0e
MD
2419 return (error);
2420
2421 /*
2422 * Unlock the source so we can lookup the target without deadlocking
2423 * (XXX vp is locked already, possible other deadlock?). The target
2424 * must not exist.
2425 */
2426 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED);
2427 nd->nl_flags &= ~NLC_NCPISLOCKED;
28623bf9 2428 cache_unlock(&nd->nl_nch);
1cd65690 2429 vn_unlock(vp);
fad57d0e 2430
5312fa43 2431 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP;
fad57d0e 2432 if ((error = nlookup(linknd)) != 0) {
1cd65690 2433 vrele(vp);
fad57d0e 2434 return (error);
984263bc 2435 }
28623bf9 2436 if (linknd->nl_nch.ncp->nc_vp) {
1cd65690 2437 vrele(vp);
fad57d0e
MD
2438 return (EEXIST);
2439 }
1c222faf 2440 VFS_MODIFYING(vp->v_mount);
b458d1ab
MD
2441 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM);
2442 if (error) {
1cd65690
MD
2443 vrele(vp);
2444 return (error);
2445 }
fad57d0e
MD
2446
2447 /*
2448 * Finally run the new API VOP.
2449 */
9910d07b 2450 error = can_hardlink(vp, td, td->td_ucred);
5312fa43
MD
2451 if (error == 0) {
2452 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp,
2453 vp, linknd->nl_cred);
2454 }
fad57d0e 2455 vput(vp);
984263bc
MD
2456 return (error);
2457}
2458
2459/*
9697c509 2460 * link_args(char *path, char *link)
41c20dac 2461 *
9697c509 2462 * Make a hard file link.
984263bc 2463 */
984263bc 2464int
753fd850 2465sys_link(struct link_args *uap)
9697c509 2466{
fad57d0e 2467 struct nlookupdata nd, linknd;
9697c509
DRJ
2468 int error;
2469
fad57d0e
MD
2470 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2471 if (error == 0) {
2472 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0);
2473 if (error == 0)
2474 error = kern_link(&nd, &linknd);
2475 nlookup_done(&linknd);
2476 }
2477 nlookup_done(&nd);
9697c509
DRJ
2478 return (error);
2479}
2480
77e32d4c
AH
2481/*
2482 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags)
2483 *
2484 * Make a hard file link. The path1 argument is relative to the directory
2485 * associated with fd1, and similarly the path2 argument is relative to
2486 * the directory associated with fd2.
2487 */
2488int
2489sys_linkat(struct linkat_args *uap)
2490{
2491 struct nlookupdata nd, linknd;
2492 struct file *fp1, *fp2;
2493 int error;
2494
2495 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE,
2496 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0);
2497 if (error == 0) {
2498 error = nlookup_init_at(&linknd, &fp2, uap->fd2,
2499 uap->path2, UIO_USERSPACE, 0);
2500 if (error == 0)
2501 error = kern_link(&nd, &linknd);
2502 nlookup_done_at(&linknd, fp2);
2503 }
2504 nlookup_done_at(&nd, fp1);
2505 return (error);
2506}
2507
9697c509 2508int
fad57d0e 2509kern_symlink(struct nlookupdata *nd, char *path, int mode)
984263bc
MD
2510{
2511 struct vattr vattr;
fad57d0e 2512 struct vnode *vp;
dff430ab 2513 struct vnode *dvp;
984263bc 2514 int error;
984263bc 2515
c4df9635 2516 bwillinode(1);
5312fa43 2517 nd->nl_flags |= NLC_CREATE | NLC_REFDVP;
fad57d0e 2518 if ((error = nlookup(nd)) != 0)
9697c509 2519 return (error);
28623bf9 2520 if (nd->nl_nch.ncp->nc_vp)
9697c509 2521 return (EEXIST);
28623bf9 2522 if ((error = ncp_writechk(&nd->nl_nch)) != 0)
72310cfb 2523 return (error);
5312fa43 2524 dvp = nd->nl_dvp;
984263bc 2525 VATTR_NULL(&vattr);
fad57d0e 2526 vattr.va_mode = mode;
dff430ab 2527 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path);
984263bc 2528 if (error == 0)
fad57d0e 2529 vput(vp);
9697c509
DRJ
2530 return (error);
2531}
2532
2533/*
2534 * symlink(char *path, char *link)
2535 *
2536 * Make a symbolic link.
2537 */
2538int
753fd850 2539sys_symlink(struct symlink_args *uap)
9697c509
DRJ
2540{
2541 struct thread *td = curthread;
fad57d0e 2542 struct nlookupdata nd;
9697c509
DRJ
2543 char *path;
2544 int error;
fad57d0e 2545 int mode;
9697c509 2546
70aac194 2547 path = objcache_get(namei_oc, M_WAITOK);
9697c509 2548 error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
e24b948e 2549 if (error == 0) {
fad57d0e
MD
2550 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0);
2551 if (error == 0) {
2552 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask;
2553 error = kern_symlink(&nd, path, mode);
2554 }
2555 nlookup_done(&nd);
e24b948e 2556 }
70aac194 2557 objcache_put(namei_oc, path);
984263bc
MD
2558 return (error);
2559}
2560
6d98f93c
SW
2561/*
2562 * symlinkat_args(char *path1, int fd, char *path2)
2563 *
2564 * Make a symbolic link. The path2 argument is relative to the directory
2565 * associated with fd.
6d98f93c
SW
2566 */
2567int
2568sys_symlinkat(struct symlinkat_args *uap)
2569{
2570 struct thread *td = curthread;
2571 struct nlookupdata nd;
2572 struct file *fp;
2573 char *path1;
2574 int error;
2575 int mode;
2576
2577 path1 = objcache_get(namei_oc, M_WAITOK);
2578 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL);
2579 if (error == 0) {
6d98f93c
SW
2580 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2,
2581 UIO_USERSPACE, 0);
2582 if (error == 0) {
2583 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask;
2584 error = kern_symlink(&nd, path1, mode);
2585 }
2586 nlookup_done_at(&nd, fp);
6d98f93c
SW
2587 }
2588 objcache_put(namei_oc, path1);
2589 return (error);
2590}
2591
984263bc 2592/*
41c20dac
MD
2593 * undelete_args(char *path)
2594 *
984263bc
MD
2595 * Delete a whiteout from the filesystem.
2596 */
984263bc 2597int
753fd850 2598sys_undelete(struct undelete_args *uap)
984263bc 2599{
fad57d0e 2600 struct nlookupdata nd;
984263bc 2601 int error;
984263bc 2602
ab2eb4eb 2603 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
c4df9635 2604 bwillinode(1);
5312fa43 2605 nd.nl_flags |= NLC_DELETE | NLC_REFDVP;
fad57d0e
MD
2606 if (error == 0)
2607 error = nlookup(&nd);
72310cfb 2608 if (error == 0)
28623bf9 2609 error = ncp_writechk(&nd.nl_nch);
dff430ab 2610 if (error == 0) {
5312fa43
MD
2611 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred,
2612 NAMEI_DELETE);
dff430ab 2613 }
fad57d0e 2614 nlookup_done(&nd);
984263bc
MD
2615 return (error);
2616}
2617
984263bc 2618int
fad57d0e 2619kern_unlink(struct nlookupdata *nd)
984263bc 2620{
984263bc 2621 int error;
984263bc 2622
c4df9635 2623 bwillinode(1);
5312fa43 2624 nd->nl_flags |= NLC_DELETE | NLC_REFDVP;
fad57d0e 2625 if ((error = nlookup(nd)) != 0)
984263bc 2626 return (error);
28623bf9 2627 if ((error = ncp_writechk(&nd->nl_nch)) != 0)
72310cfb 2628 return (error);
5312fa43 2629 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred);
984263bc
MD
2630 return (error);
2631}
2632
2633/*
9697c509 2634 * unlink_args(char *path)
41c20dac 2635 *
9697c509 2636 * Delete a name from the filesystem.
984263bc 2637 */
984263bc 2638int
753fd850 2639sys_unlink(struct unlink_args *uap)
9697c509 2640{
fad57d0e 2641 struct nlookupdata nd;
9697c509
DRJ
2642 int error;
2643
fad57d0e
MD
2644 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2645 if (error == 0)
2646 error = kern_unlink(&nd);
2647 nlookup_done(&nd);
9697c509
DRJ
2648 return (error);
2649}
2650
3a6d8629
NT
2651
2652/*
2653 * unlinkat_args(int fd, char *path, int flags)
2654 *
2655 * Delete the file or directory entry pointed to by fd/path.
2656 */
2657int
2658sys_unlinkat(struct unlinkat_args *uap)
2659{
2660 struct nlookupdata nd;
2661 struct file *fp;
2662 int error;
2663
2664 if (uap->flags & ~AT_REMOVEDIR)
2665 return (EINVAL);
2666
2667 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
2668 if (error == 0) {
2669 if (uap->flags & AT_REMOVEDIR)
2670 error = kern_rmdir(&nd);
2671 else
2672 error = kern_unlink(&nd);
2673 }
2674 nlookup_done_at(&nd, fp);
2675 return (error);
2676}
2677
9697c509 2678int
5a3fe67d 2679kern_lseek(int fd, off_t offset, int whence, off_t *res)
984263bc 2680{
dadab5e9 2681 struct thread *td = curthread;
41c20dac 2682 struct file *fp;
116edfbf 2683 struct vnode *vp;
984263bc 2684 struct vattr vattr;
116edfbf 2685 off_t new_offset;
984263bc
MD
2686 int error;
2687
35949930 2688 fp = holdfp(td, fd, -1);
228b401d 2689 if (fp == NULL)
984263bc 2690 return (EBADF);
228b401d
MD
2691 if (fp->f_type != DTYPE_VNODE) {
2692 error = ESPIPE;
2693 goto done;
2694 }
116edfbf 2695 vp = (struct vnode *)fp->f_data;
228b401d 2696
9697c509 2697 switch (whence) {
984263bc 2698 case L_INCR:
287a8577 2699 spin_lock(&fp->f_spin);
116edfbf 2700 new_offset = fp->f_offset + offset;
228b401d 2701 error = 0;
984263bc
MD
2702 break;
2703 case L_XTND:
116edfbf 2704 error = VOP_GETATTR(vp, &vattr);
287a8577 2705 spin_lock(&fp->f_spin);
116edfbf 2706 new_offset = offset + vattr.va_size;
984263bc
MD
2707 break;
2708 case L_SET:
116edfbf 2709 new_offset = offset;
228b401d 2710 error = 0;
287a8577 2711 spin_lock(&fp->f_spin);
984263bc
MD
2712 break;
2713 default:
116edfbf 2714 new_offset = 0;
228b401d 2715 error = EINVAL;
287a8577 2716 spin_lock(&fp->f_spin);
228b401d 2717 break;
984263bc 2718 }
116edfbf
MD
2719
2720 /*
2721 * Validate the seek position. Negative offsets are not allowed
58675cb0
MD
2722 * for regular files or directories.
2723 *
2724 * Normally we would also not want to allow negative offsets for
2725 * character and block-special devices. However kvm addresses
2726 * on 64 bit architectures might appear to be negative and must
2727 * be allowed.
116edfbf
MD
2728 */
2729 if (error == 0) {
2730 if (new_offset < 0 &&
58675cb0 2731 (vp->v_type == VREG || vp->v_type == VDIR)) {
116edfbf 2732 error = EINVAL;
b06b3b87
MD
2733 } else {
2734 fp->f_offset = new_offset;
116edfbf 2735 }
116edfbf 2736 }
9697c509 2737 *res = fp->f_offset;
287a8577 2738 spin_unlock(&fp->f_spin);
228b401d 2739done:
35949930
MD
2740 dropfp(td, fd, fp);
2741
228b401d 2742 return (error);
984263bc
MD
2743}
2744
984263bc 2745/*
9697c509 2746 * lseek_args(int fd, int pad, off_t offset, int whence)
41c20dac 2747 *
9697c509 2748 * Reposition read/write file offset.
984263bc 2749 */
984263bc 2750int
753fd850 2751sys_lseek(struct lseek_args *uap)
984263bc 2752{
984263bc
MD
2753 int error;
2754
9697c509 2755 error = kern_lseek(uap->fd, uap->offset, uap->whence,
116edfbf 2756 &uap->sysmsg_offset);
9697c509 2757
984263bc
MD
2758 return (error);
2759}
984263bc 2760
18cf460b
NT
2761/*
2762 * Check if current process can access given file. amode is a bitmask of *_OK
2763 * access bits. flags is a bitmask of AT_* flags.
2764 */
984263bc 2765int
18cf460b 2766kern_access(struct nlookupdata *nd, int amode, int flags)
984263bc 2767{
41c20dac 2768 struct vnode *vp;
18cf460b 2769 int error, mode;
984263bc 2770
18cf460b
NT
2771 if (flags & ~AT_EACCESS)
2772 return (EINVAL);
12cdc371 2773 nd->nl_flags |= NLC_SHAREDLOCK;
fad57d0e
MD
2774 if ((error = nlookup(nd)) != 0)
2775 return (error);
806dcf9a 2776retry:
12cdc371 2777 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp);
9697c509 2778 if (error)
fad57d0e 2779 return (error);
984263bc
MD
2780
2781 /* Flags == 0 means only check for existence. */
18cf460b
NT
2782 if (amode) {
2783 mode = 0;
2784 if (amode & R_OK)
2785 mode |= VREAD;
2786 if (amode & W_OK)
2787 mode |= VWRITE;
2788 if (amode & X_OK)
2789 mode |= VEXEC;
2790 if ((mode & VWRITE) == 0 ||
28623bf9 2791 (error = vn_writechk(vp, &nd->nl_nch)) == 0)
18cf460b 2792 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred);
806dcf9a
MD
2793
2794 /*
2795 * If the file handle is stale we have to re-resolve the
27e6b4d5
MD
2796 * entry with the ncp held exclusively. This is a hack
2797 * at the moment.
806dcf9a
MD
2798 */
2799 if (error == ESTALE) {
5e1a76ea 2800 vput(vp);
27e6b4d5
MD
2801 cache_unlock(&nd->nl_nch);
2802 cache_lock(&nd->nl_nch);
28623bf9
MD
2803 cache_setunresolved(&nd->nl_nch);
2804 error = cache_resolve(&nd->nl_nch, nd->nl_cred);
806dcf9a 2805 if (error == 0) {
806dcf9a
MD
2806 vp = NULL;
2807 goto retry;
2808 }
5e1a76ea 2809 return(error);
806dcf9a 2810 }
984263bc 2811 }
984263bc 2812 vput(vp);
984263bc
MD
2813 return (error);
2814}
2815
984263bc 2816/*
9697c509 2817 * access_args(char *path, int flags)
41c20dac 2818 *
9697c509 2819 * Check access permissions.
984263bc 2820 */
984263bc 2821int
753fd850 2822sys_access(struct access_args *uap)
984263bc 2823{
fad57d0e 2824 struct nlookupdata nd;
9697c509 2825 int error;
984263bc 2826
fad57d0e
MD
2827 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2828 if (error == 0)
18cf460b 2829 error = kern_access(&nd, uap->flags, 0);
fad57d0e 2830 nlookup_done(&nd);
984263bc
MD
2831 return (error);
2832}
2833
18cf460b 2834
6eb83deb
JM
2835/*
2836 * eaccess_args(char *path, int flags)
2837 *
2838 * Check access permissions.
2839 */
2840int
2841sys_eaccess(struct eaccess_args *uap)
2842{
2843 struct nlookupdata nd;
2844 int error;
2845
2846 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2847 if (error == 0)
2848 error = kern_access(&nd, uap->flags, AT_EACCESS);
2849 nlookup_done(&nd);
2850 return (error);
2851}
2852
2853
18cf460b
NT
2854/*
2855 * faccessat_args(int fd, char *path, int amode, int flags)
2856 *
2857 * Check access permissions.
2858 */
2859int
2860sys_faccessat(struct faccessat_args *uap)
2861{
2862 struct nlookupdata nd;
2863 struct file *fp;
2864 int error;
2865
2866 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE,
2867 NLC_FOLLOW);
2868 if (error == 0)
2869 error = kern_access(&nd, uap->amode, uap->flags);
2870 nlookup_done_at(&nd, fp);
2871 return (error);
2872}
2873
984263bc 2874int
21739618 2875kern_stat(struct nlookupdata *nd, struct stat *st)
984263bc 2876{
984263bc 2877 int error;
21739618 2878 struct vnode *vp;
984263bc 2879
12cdc371 2880 nd->nl_flags |= NLC_SHAREDLOCK;
21739618 2881 if ((error = nlookup(nd)) != 0)
984263bc 2882 return (error);
806dcf9a 2883again:
28623bf9 2884 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL)
21739618
MD
2885 return (ENOENT);
2886
87de5057 2887 if ((error = vget(vp, LK_SHARED)) != 0)
21739618 2888 return (error);
87de5057 2889 error = vn_stat(vp, st, nd->nl_cred);
806dcf9a
MD
2890
2891 /*
27e6b4d5
MD
2892 * If the file handle is stale we have to re-resolve the
2893 * entry with the ncp held exclusively. This is a hack
2894 * at the moment.
806dcf9a
MD
2895 */
2896 if (error == ESTALE) {
5dc91765 2897 vput(vp);
27e6b4d5
MD
2898 cache_unlock(&nd->nl_nch);
2899 cache_lock(&nd->nl_nch);
28623bf9
MD
2900 cache_setunresolved(&nd->nl_nch);
2901 error = cache_resolve(&nd->nl_nch, nd->nl_cred);
5dc91765 2902 if (error == 0)
806dcf9a 2903 goto again;
5dc91765
MD
2904 } else {
2905 vput(vp);
806dcf9a 2906 }
984263bc
MD
2907 return (error);
2908}
2909
984263bc 2910/*
41c20dac
MD
2911 * stat_args(char *path, struct stat *ub)
2912 *
984263bc
MD
2913 * Get file status; this version follows links.
2914 */
984263bc 2915int
753fd850 2916sys_stat(struct stat_args *uap)
984263bc 2917{
21739618 2918 struct nlookupdata nd;
9697c509
DRJ
2919 struct stat st;
2920 int error;
984263bc 2921
21739618
MD
2922 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2923 if (error == 0) {
2924 error = kern_stat(&nd, &st);
2925 if (error == 0)
2926 error = copyout(&st, uap->ub, sizeof(*uap->ub));
21739618 2927 }
fad57d0e 2928 nlookup_done(&nd);
984263bc
MD
2929 return (error);
2930}
2931
2932/*
41c20dac
MD
2933 * lstat_args(char *path, struct stat *ub)
2934 *
984263bc
MD
2935 * Get file status; this version does not follow links.
2936 */
984263bc 2937int
753fd850 2938sys_lstat(struct lstat_args *uap)
984263bc 2939{
21739618 2940 struct nlookupdata nd;
9697c509
DRJ
2941 struct stat st;
2942 int error;
984263bc 2943
21739618
MD
2944 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2945 if (error == 0) {
2946 error = kern_stat(&nd, &st);
2947 if (error == 0)
2948 error = copyout(&st, uap->ub, sizeof(*uap->ub));
21739618 2949 }
fad57d0e 2950 nlookup_done(&nd);
984263bc
MD
2951 return (error);
2952}
2953
e101a3f4
NT
2954/*
2955 * fstatat_args(int fd, char *path, struct stat *sb, int flags)
2956 *
2957 * Get status of file pointed to by fd/path.
2958 */
2959int
2960sys_fstatat(struct fstatat_args *uap)
2961{
2962 struct nlookupdata nd;
2963 struct stat st;
2964 int error;
2965 int flags;
2966 struct file *fp;
2967
dbeafe1e 2968 if (uap->flags & ~AT_SYMLINK_NOFOLLOW)
e101a3f4
NT
2969 return (EINVAL);
2970
2971 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
2972
2973 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path,
2974 UIO_USERSPACE, flags);
2975 if (error == 0) {
2976 error = kern_stat(&nd, &st);
2977 if (error == 0)
2978 error = copyout(&st, uap->sb, sizeof(*uap->sb));
2979 }
2980 nlookup_done_at(&nd, fp);
2981 return (error);
2982}
2983
62d9e552
SW
2984static int
2985kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp)
984263bc 2986{
fad57d0e
MD
2987 struct nlookupdata nd;
2988 struct vnode *vp;
984263bc 2989 int error;
984263bc 2990
fad57d0e 2991 vp = NULL;
62d9e552 2992 error = nlookup_init(&nd, path, UIO_USERSPACE, flags);
fad57d0e
MD
2993 if (error == 0)
2994 error = nlookup(&nd);
2995 if (error == 0)
28623bf9 2996 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
fad57d0e
MD
2997 nlookup_done(&nd);
2998 if (error == 0) {
62d9e552 2999 error = VOP_PATHCONF(vp, name, sysmsg_regp);
fad57d0e
MD
3000 vput(vp);
3001 }
984263bc
MD
3002 return (error);
3003}
3004
62d9e552
SW
3005/*
3006 * pathconf_Args(char *path, int name)
3007 *
3008 * Get configurable pathname variables.
3009 */
3010int
3011sys_pathconf(struct pathconf_args *uap)
3012{
3013 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW,
3014 &uap->sysmsg_reg));
3015}
3016
3017/*
3018 * lpathconf_Args(char *path, int name)
3019 *
3020 * Get configurable pathname variables, but don't follow symlinks.
3021 */
3022int
3023sys_lpathconf(struct lpathconf_args *uap)
3024{
3025 return (kern_pathconf(uap->path, uap->name, 0, &uap->sysmsg_reg));
3026}
3027
bbab27f6
DRJ
3028/*
3029 * XXX: daver
3030 * kern_readlink isn't properly split yet. There is a copyin burried
3031 * in VOP_READLINK().
3032 */
984263bc 3033int
fad57d0e 3034kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res)
984263bc 3035{
dadab5e9 3036 struct thread *td = curthread;
41c20dac 3037 struct vnode *vp;
984263bc
MD
3038 struct iovec aiov;
3039 struct uio auio;
3040 int error;
984263bc 3041
12cdc371 3042 nd->nl_flags |= NLC_SHAREDLOCK;
fad57d0e
MD
3043 if ((error = nlookup(nd)) != 0)
3044 return (error);
12cdc371 3045 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp);
9697c509 3046 if (error)
984263bc 3047 return (error);
fad57d0e 3048 if (vp->v_type != VLNK) {
984263bc 3049 error = EINVAL;
fad57d0e 3050 } else {
9697c509
DRJ
3051 aiov.iov_base = buf;
3052 aiov.iov_len = count;
984263bc
MD
3053 auio.uio_iov = &aiov;
3054 auio.uio_iovcnt = 1;
3055 auio.uio_offset = 0;
3056 auio.uio_rw = UIO_READ;
3057 auio.uio_segflg = UIO_USERSPACE;
dadab5e9 3058 auio.uio_td = td;
9697c509 3059 auio.uio_resid = count;
9910d07b 3060 error = VOP_READLINK(vp, &auio, td->td_ucred);
984263bc
MD
3061 }
3062 vput(vp);
9697c509
DRJ
3063 *res = count - auio.uio_resid;
3064 return (error);
3065}
3066
3067/*
3068 * readlink_args(char *path, char *buf, int count)
3069 *
3070 * Return target name of a symbolic link.
3071 */
3072int
753fd850 3073sys_readlink(struct readlink_args *uap)
9697c509 3074{
fad57d0e 3075 struct nlookupdata nd;
9697c509
DRJ
3076 int error;
3077
fad57d0e
MD
3078 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
3079 if (error == 0) {
3080 error = kern_readlink(&nd, uap->buf, uap->count,
3081 &uap->sysmsg_result);
3082 }
3083 nlookup_done(&nd);
984263bc
MD
3084 return (error);
3085}
3086
6d98f93c
SW
3087/*
3088 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize)
3089 *
3090 * Return target name of a symbolic link. The path is relative to the
3091 * directory associated with fd.
6d98f93c
SW
3092 */
3093int
3094sys_readlinkat(struct readlinkat_args *uap)
3095{
3096 struct nlookupdata nd;
3097 struct file *fp;
3098 int error;
3099
6d98f93c
SW
3100 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
3101 if (error == 0) {
3102 error = kern_readlink(&nd, uap->buf, uap->bufsize,
3103 &uap->sysmsg_result);
3104 }
3105 nlookup_done_at(&nd, fp);
6d98f93c
SW
3106 return (error);
3107}
3108
984263bc 3109static int
41c20dac 3110setfflags(struct vnode *vp, int flags)
984263bc 3111{
dadab5e9 3112 struct thread *td = curthread;
984263bc
MD
3113 int error;
3114 struct vattr vattr;
3115
3116 /*
3117 * Prevent non-root users from setting flags on devices. When
3118 * a device is reused, users can retain ownership of the device
3119 * if they are allowed to set flags and programs assume that
3120 * chown can't fail when done as root.
3121 */
3122 if ((vp->v_type == VCHR || vp->v_type == VBLK) &&
9910d07b 3123 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0))
984263bc
MD
3124 return (error);
3125
186fccd6
MD
3126 /*
3127 * note: vget is required for any operation that might mod the vnode
3128 * so VINACTIVE is properly cleared.
3129 */
87de5057 3130 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
186fccd6
MD
3131 VATTR_NULL(&vattr);
3132 vattr.va_flags = flags;
9910d07b 3133 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
186fccd6
MD
3134 vput(vp);
3135 }
984263bc
MD
3136 return (error);
3137}
3138
3139/*
41c20dac
MD
3140 * chflags(char *path, int flags)
3141 *
984263bc
MD
3142 * Change flags of a file given a path name.
3143 */
984263bc 3144int
753fd850 3145sys_chflags(struct chflags_args *uap)
984263bc 3146{
fad57d0e
MD
3147 struct nlookupdata nd;
3148 struct vnode *vp;
984263bc 3149 int error;
984263bc 3150
fad57d0e 3151 vp = NULL;
ab2eb4eb 3152 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
fad57d0e
MD
3153 if (error == 0)
3154 error = nlookup(&nd);
72310cfb 3155 if (error == 0)
28623bf9 3156 error = ncp_writechk(&nd.nl_nch);
fad57d0e 3157 if (error == 0)
28623bf9 3158 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
fad57d0e
MD
3159 nlookup_done(&nd);
3160 if (error == 0) {
ab2eb4eb 3161 error = setfflags(vp, uap->flags);
fad57d0e
MD
3162 vrele(vp);
3163 }
3164 return (error);
984263bc
MD
3165}
3166
91fdc001
PA
3167/*
3168 * lchflags(char *path, int flags)
3169 *
3170 * Change flags of a file given a path name, but don't follow symlinks.
3171 */
91fdc001
PA
3172int
3173sys_lchflags(struct lchflags_args *uap)
3174{
3175 struct nlookupdata nd;
3176 struct vnode *vp;
3177 int error;
3178
3179 vp = NULL;
3180 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
91fdc001
PA
3181 if (error == 0)
3182 error = nlookup(&nd);
3183 if (error == 0)
3184 error = ncp_writechk(&nd.nl_nch);
3185 if (error == 0)
3186 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
3187 nlookup_done(&nd);
3188 if (error == 0) {
3189 error = setfflags(vp, uap->flags);
3190 vrele(vp);
3191 }
3192 return (error);
3193}
3194
984263bc 3195/*
41c20dac
MD
3196 * fchflags_args(int fd, int flags)
3197 *
984263bc
MD
3198 * Change flags of a file given a file descriptor.
3199 */
984263bc 3200int
753fd850 3201sys_fchflags(struct fchflags_args *uap)
984263bc 3202{
dadab5e9 3203 struct thread *td = curthread;
984263bc
MD
3204 struct file *fp;
3205 int error;
3206
35949930 3207 if ((error = holdvnode(td, uap->fd, &fp)) != 0)
984263bc 3208 return (error);
28623bf9
MD
3209 if (fp->f_nchandle.ncp)
3210 error = ncp_writechk(&fp->f_nchandle);
72310cfb
MD
3211 if (error == 0)
3212 error = setfflags((struct vnode *) fp->f_data, uap->flags);
5b287bba
MD
3213 fdrop(fp);
3214 return (error);
984263bc
MD
3215}
3216
faac4b79
JG
3217/*
3218 * chflagsat_args(int fd, const char *path, int flags, int atflags)
3219 * change flags given a pathname relative to a filedescriptor
3220 */
3221int sys_chflagsat(struct chflagsat_args *uap)
3222{
3223 struct nlookupdata nd;
3224 struct vnode *vp;
3225 struct file *fp;
3226 int error;
3227 int lookupflags;
3228
3229 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW)
3230 return (EINVAL);
3231
3232 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
3233
3234 vp = NULL;
3235 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags);
3236 if (error == 0)
3237 error = nlookup(&nd);
3238 if (error == 0)
3239 error = ncp_writechk(&nd.nl_nch);
3240 if (error == 0)
3241 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
3242 nlookup_done_at(&nd, fp);
3243 if (error == 0) {
3244 error = setfflags(vp, uap->flags);
3245 vrele(vp);
3246 }
3247 return (error);
3248}
3249
3250
984263bc 3251static int
41c20dac 3252setfmode(struct vnode *vp, int mode)
984263bc 3253{
dadab5e9 3254 struct thread *td = curthread;
984263bc
MD
3255 int error;
3256 struct vattr vattr;
3257
186fccd6
MD
3258 /*
3259 * note: vget is required for any operation that might mod the vnode
3260 * so VINACTIVE is properly cleared.
3261 */
87de5057 3262 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {