MFC: fstest regression fixes - POSIX error codes.
[dragonfly.git] / sys / kern / vfs_nlookup.c
CommitLineData
690a3127
MD
1/*
2 * Copyright (c) 2004 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
c9aef211 34 * $DragonFly: src/sys/kern/vfs_nlookup.c,v 1.25 2008/07/19 04:43:33 dillon Exp $
690a3127
MD
35 */
36/*
37 * nlookup() is the 'new' namei interface. Rather then return directory and
38 * leaf vnodes (in various lock states) the new interface instead deals in
39 * namecache records. Namecache records may represent both a positive or
40 * a negative hit. The namespace is locked via the namecache record instead
41 * of via the vnode, and only the leaf namecache record (representing the
42 * filename) needs to be locked.
43 *
44 * This greatly improves filesystem parallelism and is a huge simplification
45 * of the API verses the old vnode locking / namei scheme.
46 *
47 * Filesystems must actively control the caching aspects of the namecache,
48 * and since namecache pointers are used as handles they are non-optional
49 * even for filesystems which do not generally wish to cache things. It is
50 * intended that a separate cache coherency API will be constructed to handle
51 * these issues.
52 */
53
54#include "opt_ktrace.h"
55
56#include <sys/param.h>
57#include <sys/systm.h>
58#include <sys/kernel.h>
59#include <sys/vnode.h>
60#include <sys/mount.h>
61#include <sys/filedesc.h>
62#include <sys/proc.h>
63#include <sys/namei.h>
64#include <sys/nlookup.h>
65#include <sys/malloc.h>
21739618 66#include <sys/stat.h>
70aac194 67#include <sys/objcache.h>
690a3127
MD
68
69#ifdef KTRACE
70#include <sys/ktrace.h>
71#endif
72
5dedb299
MD
73static int naccess_va(struct vattr *va, int vmode, struct ucred *cred);
74
690a3127
MD
75/*
76 * Initialize a nlookup() structure, early error return for copyin faults
77 * or a degenerate empty string (which is not allowed).
1f95166e
HP
78 *
79 * The first process proc0's credentials are used if the calling thread
80 * is not associated with a process context.
690a3127
MD
81 */
82int
21739618
MD
83nlookup_init(struct nlookupdata *nd,
84 const char *path, enum uio_seg seg, int flags)
690a3127
MD
85{
86 size_t pathlen;
87 struct proc *p;
88 thread_t td;
89 int error;
90
91 td = curthread;
92 p = td->td_proc;
93
94 /*
95 * note: the pathlen set by copy*str() includes the terminating \0.
96 */
97 bzero(nd, sizeof(struct nlookupdata));
70aac194 98 nd->nl_path = objcache_get(namei_oc, M_WAITOK);
690a3127
MD
99 nd->nl_flags |= NLC_HASBUF;
100 if (seg == UIO_SYSSPACE)
101 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen);
102 else
103 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen);
104
105 /*
106 * Don't allow empty pathnames.
107 * POSIX.1 requirement: "" is not a vaild file name.
108 */
109 if (error == 0 && pathlen <= 1)
110 error = ENOENT;
111
112 if (error == 0) {
21739618 113 if (p && p->p_fd) {
28623bf9
MD
114 cache_copy(&p->p_fd->fd_ncdir, &nd->nl_nch);
115 cache_copy(&p->p_fd->fd_nrdir, &nd->nl_rootnch);
116 if (p->p_fd->fd_njdir.ncp)
117 cache_copy(&p->p_fd->fd_njdir, &nd->nl_jailnch);
21739618 118 nd->nl_cred = crhold(p->p_ucred);
690a3127 119 } else {
28623bf9
MD
120 cache_copy(&rootnch, &nd->nl_nch);
121 cache_copy(&nd->nl_nch, &nd->nl_rootnch);
122 cache_copy(&nd->nl_nch, &nd->nl_jailnch);
21739618 123 nd->nl_cred = crhold(proc0.p_ucred);
690a3127
MD
124 }
125 nd->nl_td = td;
690a3127
MD
126 nd->nl_flags |= flags;
127 } else {
128 nlookup_done(nd);
129 }
130 return(error);
131}
132
133/*
fad57d0e 134 * This works similarly to nlookup_init() but does not assume a process
28623bf9 135 * context. rootnch is always chosen for the root directory and the cred
fad57d0e
MD
136 * and starting directory are supplied in arguments.
137 */
138int
139nlookup_init_raw(struct nlookupdata *nd,
140 const char *path, enum uio_seg seg, int flags,
28623bf9 141 struct ucred *cred, struct nchandle *ncstart)
fad57d0e
MD
142{
143 size_t pathlen;
144 thread_t td;
145 int error;
146
147 td = curthread;
148
149 bzero(nd, sizeof(struct nlookupdata));
70aac194 150 nd->nl_path = objcache_get(namei_oc, M_WAITOK);
fad57d0e
MD
151 nd->nl_flags |= NLC_HASBUF;
152 if (seg == UIO_SYSSPACE)
153 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen);
154 else
155 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen);
156
157 /*
158 * Don't allow empty pathnames.
159 * POSIX.1 requirement: "" is not a vaild file name.
160 */
161 if (error == 0 && pathlen <= 1)
162 error = ENOENT;
163
164 if (error == 0) {
28623bf9
MD
165 cache_copy(ncstart, &nd->nl_nch);
166 cache_copy(&rootnch, &nd->nl_rootnch);
167 cache_copy(&rootnch, &nd->nl_jailnch);
fad57d0e
MD
168 nd->nl_cred = crhold(cred);
169 nd->nl_td = td;
170 nd->nl_flags |= flags;
171 } else {
172 nlookup_done(nd);
173 }
174 return(error);
175}
176
177/*
1f95166e
HP
178 * Set a different credential; this credential will be used by future
179 * operations performed on nd.nl_open_vp and nlookupdata structure.
180 */
181void
182nlookup_set_cred(struct nlookupdata *nd, struct ucred *cred)
183{
184 KKASSERT(nd->nl_cred != NULL);
185
186 if (nd->nl_cred != cred) {
187 cred = crhold(cred);
188 crfree(nd->nl_cred);
189 nd->nl_cred = cred;
190 }
191}
192
193/*
21739618
MD
194 * Cleanup a nlookupdata structure after we are through with it. This may
195 * be called on any nlookupdata structure initialized with nlookup_init().
196 * Calling nlookup_done() is mandatory in all cases except where nlookup_init()
197 * returns an error, even if as a consumer you believe you have taken all
198 * dynamic elements out of the nlookupdata structure.
690a3127
MD
199 */
200void
201nlookup_done(struct nlookupdata *nd)
202{
28623bf9 203 if (nd->nl_nch.ncp) {
21739618
MD
204 if (nd->nl_flags & NLC_NCPISLOCKED) {
205 nd->nl_flags &= ~NLC_NCPISLOCKED;
28623bf9 206 cache_unlock(&nd->nl_nch);
21739618 207 }
28623bf9 208 cache_drop(&nd->nl_nch);
690a3127 209 }
28623bf9
MD
210 if (nd->nl_rootnch.ncp)
211 cache_drop(&nd->nl_rootnch);
212 if (nd->nl_jailnch.ncp)
213 cache_drop(&nd->nl_jailnch);
690a3127 214 if ((nd->nl_flags & NLC_HASBUF) && nd->nl_path) {
70aac194 215 objcache_put(namei_oc, nd->nl_path);
690a3127
MD
216 nd->nl_path = NULL;
217 }
218 if (nd->nl_cred) {
219 crfree(nd->nl_cred);
220 nd->nl_cred = NULL;
221 }
fad57d0e
MD
222 if (nd->nl_open_vp) {
223 if (nd->nl_flags & NLC_LOCKVP) {
a11aaa81 224 vn_unlock(nd->nl_open_vp);
fad57d0e
MD
225 nd->nl_flags &= ~NLC_LOCKVP;
226 }
87de5057 227 vn_close(nd->nl_open_vp, nd->nl_vp_fmode);
fad57d0e
MD
228 nd->nl_open_vp = NULL;
229 }
5312fa43
MD
230 if (nd->nl_dvp) {
231 vrele(nd->nl_dvp);
232 nd->nl_dvp = NULL;
233 }
fad57d0e
MD
234 nd->nl_flags = 0; /* clear remaining flags (just clear everything) */
235}
236
237void
238nlookup_zero(struct nlookupdata *nd)
239{
240 bzero(nd, sizeof(struct nlookupdata));
690a3127
MD
241}
242
243/*
21739618
MD
244 * Simple all-in-one nlookup. Returns a locked namecache structure or NULL
245 * if an error occured.
246 *
247 * Note that the returned ncp is not checked for permissions, though VEXEC
248 * is checked on the directory path leading up to the result. The caller
249 * must call naccess() to check the permissions of the returned leaf.
690a3127 250 */
28623bf9 251struct nchandle
21739618
MD
252nlookup_simple(const char *str, enum uio_seg seg,
253 int niflags, int *error)
690a3127
MD
254{
255 struct nlookupdata nd;
28623bf9 256 struct nchandle nch;
690a3127
MD
257
258 *error = nlookup_init(&nd, str, seg, niflags);
259 if (*error == 0) {
21739618 260 if ((*error = nlookup(&nd)) == 0) {
28623bf9
MD
261 nch = nd.nl_nch; /* keep hold ref from structure */
262 cache_zero(&nd.nl_nch); /* and NULL out */
21739618 263 } else {
28623bf9 264 cache_zero(&nch);
21739618 265 }
690a3127
MD
266 nlookup_done(&nd);
267 } else {
28623bf9 268 cache_zero(&nch);
690a3127 269 }
28623bf9 270 return(nch);
690a3127
MD
271}
272
273/*
274 * Do a generic nlookup. Note that the passed nd is not nlookup_done()'d
275 * on return, even if an error occurs. If no error occurs the returned
28623bf9 276 * nl_nch is always referenced and locked, otherwise it may or may not be.
21739618
MD
277 *
278 * Intermediate directory elements, including the current directory, require
279 * execute (search) permission. nlookup does not examine the access
280 * permissions on the returned element.
fad57d0e 281 *
bf022891
MD
282 * If NLC_CREATE is set the last directory must allow node creation,
283 * and an error code of 0 will be returned for a non-existant
284 * target (not ENOENT).
285 *
286 * If NLC_RENAME is set the last directory mut allow node deletion,
287 * plus the sticky check is made, and an error code of 0 will be returned
288 * for a non-existant target (not ENOENT). NLC_RENAME is set used for
289 * the rename target.
290 *
291 * If NLC_DELETE is set the last directory mut allow node deletion,
292 * plus the sticky check is made.
5312fa43
MD
293 *
294 * If NLC_REFDVP is set nd->nl_dvp will be set to the directory vnode
295 * of the returned entry. The vnode will be referenced, but not locked,
296 * and will be released by nlookup_done() along with everything else.
690a3127
MD
297 */
298int
299nlookup(struct nlookupdata *nd)
300{
301 struct nlcomponent nlc;
28623bf9
MD
302 struct nchandle nch;
303 struct mount *mp;
f4d4e93a 304 int wasdotordotdot;
690a3127 305 char *ptr;
fad57d0e 306 char *xptr;
690a3127
MD
307 int error;
308 int len;
309
310#ifdef KTRACE
311 if (KTRPOINT(nd->nl_td, KTR_NAMEI))
9fb04d14 312 ktrnamei(nd->nl_td->td_lwp, nd->nl_path);
690a3127
MD
313#endif
314 bzero(&nlc, sizeof(nlc));
315
316 /*
317 * Setup for the loop. The current working namecache element must
524c845c
MD
318 * be in a refd + unlocked state. This typically the case on entry except
319 * when stringing nlookup()'s along in a chain, since nlookup() always
28623bf9 320 * returns nl_nch in a locked state.
690a3127
MD
321 */
322 nd->nl_loopcnt = 0;
323 if (nd->nl_flags & NLC_NCPISLOCKED) {
324 nd->nl_flags &= ~NLC_NCPISLOCKED;
28623bf9 325 cache_unlock(&nd->nl_nch);
690a3127 326 }
5312fa43
MD
327 if (nd->nl_dvp ) {
328 vrele(nd->nl_dvp);
329 nd->nl_dvp = NULL;
330 }
690a3127
MD
331 ptr = nd->nl_path;
332
333 /*
28623bf9 334 * Loop on the path components. At the top of the loop nd->nl_nch
524c845c 335 * is ref'd and unlocked and represents our current position.
690a3127
MD
336 */
337 for (;;) {
338 /*
339 * Check if the root directory should replace the current
340 * directory. This is done at the start of a translation
341 * or after a symbolic link has been found. In other cases
342 * ptr will never be pointing at a '/'.
343 */
344 if (*ptr == '/') {
345 do {
346 ++ptr;
347 } while (*ptr == '/');
28623bf9
MD
348 cache_copy(&nd->nl_rootnch, &nch);
349 cache_drop(&nd->nl_nch);
350 nd->nl_nch = nch;
5312fa43
MD
351
352 /*
353 * Fast-track termination. There is no parent directory of
354 * the root in the same mount from the point of view of
355 * the caller so return EPERM if NLC_REFDVP is specified.
356 * e.g. 'rmdir /' is not allowed.
357 */
21739618 358 if (*ptr == 0) {
5312fa43
MD
359 if (nd->nl_flags & NLC_REFDVP) {
360 error = EPERM;
361 } else {
362 cache_lock(&nd->nl_nch);
363 nd->nl_flags |= NLC_NCPISLOCKED;
364 error = 0;
365 }
21739618
MD
366 break;
367 }
690a3127
MD
368 continue;
369 }
370
371 /*
28623bf9 372 * Check directory search permissions.
21739618 373 */
5dedb299 374 if ((error = naccess(&nd->nl_nch, VEXEC, nd->nl_cred, NULL)) != 0)
21739618
MD
375 break;
376
377 /*
fa8302b1
SS
378 * Extract the path component. Path components are limited to
379 * 255 characters.
690a3127
MD
380 */
381 nlc.nlc_nameptr = ptr;
382 while (*ptr && *ptr != '/')
383 ++ptr;
384 nlc.nlc_namelen = ptr - nlc.nlc_nameptr;
fa8302b1
SS
385 if (nlc.nlc_namelen >= 256) {
386 error = ENAMETOOLONG;
387 break;
388 }
690a3127
MD
389
390 /*
21739618
MD
391 * Lookup the path component in the cache, creating an unresolved
392 * entry if necessary. We have to handle "." and ".." as special
393 * cases.
394 *
395 * When handling ".." we have to detect a traversal back through a
28623bf9 396 * mount point. If we are at the root, ".." just returns the root.
524c845c 397 *
28623bf9 398 * This subsection returns a locked, refd 'nch' unless it errors out.
fad57d0e
MD
399 * The namecache topology is not allowed to be disconnected, so
400 * encountering a NULL parent will generate EINVAL. This typically
401 * occurs when a directory is removed out from under a process.
21739618
MD
402 */
403 if (nlc.nlc_namelen == 1 && nlc.nlc_nameptr[0] == '.') {
28623bf9 404 cache_get(&nd->nl_nch, &nch);
f4d4e93a 405 wasdotordotdot = 1;
21739618
MD
406 } else if (nlc.nlc_namelen == 2 &&
407 nlc.nlc_nameptr[0] == '.' && nlc.nlc_nameptr[1] == '.') {
28623bf9
MD
408 if (nd->nl_nch.mount == nd->nl_rootnch.mount &&
409 nd->nl_nch.ncp == nd->nl_rootnch.ncp
410 ) {
411 /*
412 * ".." at the root returns the root
413 */
414 cache_get(&nd->nl_nch, &nch);
21739618 415 } else {
28623bf9
MD
416 /*
417 * Locate the parent ncp. If we are at the root of a
418 * filesystem mount we have to skip to the mounted-on
419 * point in the underlying filesystem.
420 */
421 nch = nd->nl_nch;
422 while (nch.ncp == nch.mount->mnt_ncmountpt.ncp)
e5cde29c 423 nch = nch.mount->mnt_ncmounton;
28623bf9
MD
424 nch.ncp = nch.ncp->nc_parent;
425 KKASSERT(nch.ncp != NULL);
426 cache_get(&nch, &nch);
21739618 427 }
fa8302b1 428 wasdotordotdot = 2;
21739618 429 } else {
28623bf9
MD
430 nch = cache_nlookup(&nd->nl_nch, &nlc);
431 while ((error = cache_resolve(&nch, nd->nl_cred)) == EAGAIN) {
6ea70f76 432 kprintf("[diagnostic] nlookup: relookup %*.*s\n",
28623bf9
MD
433 nch.ncp->nc_nlen, nch.ncp->nc_nlen, nch.ncp->nc_name);
434 cache_put(&nch);
435 nch = cache_nlookup(&nd->nl_nch, &nlc);
8e005a45 436 }
f4d4e93a 437 wasdotordotdot = 0;
21739618 438 }
524c845c 439 /*
28623bf9 440 * [end of subsection] ncp is locked and ref'd. nd->nl_nch is ref'd
524c845c 441 */
21739618
MD
442
443 /*
444 * Resolve the namespace if necessary. The ncp returned by
445 * cache_nlookup() is referenced and locked.
8e005a45
MD
446 *
447 * XXX neither '.' nor '..' should return EAGAIN since they were
448 * previously resolved and thus cannot be newly created ncp's.
690a3127 449 */
28623bf9
MD
450 if (nch.ncp->nc_flag & NCF_UNRESOLVED) {
451 error = cache_resolve(&nch, nd->nl_cred);
8e005a45 452 KKASSERT(error != EAGAIN);
690a3127 453 } else {
28623bf9 454 error = nch.ncp->nc_error;
690a3127 455 }
21739618
MD
456
457 /*
fad57d0e 458 * Early completion. ENOENT is not an error if this is the last
bf022891
MD
459 * component and NLC_CREATE or NLC_RENAME (rename target) was
460 * requested. Note that ncp->nc_error is left as ENOENT in that
461 * case, which we check later on.
462 *
463 * NOTE: For NLC_RENAME in the ENOENT case we do a VCREATE test,
464 * same as for NLC_CREATE.
f4d4e93a
MD
465 *
466 * Also handle invalid '.' or '..' components terminating a path
bf022891
MD
467 * for a create/rename/delete. The standard requires this and pax
468 * pretty stupidly depends on it.
fad57d0e
MD
469 */
470 for (xptr = ptr; *xptr == '/'; ++xptr)
471 ;
f4d4e93a 472 if (*xptr == 0) {
bf022891 473 if (error == ENOENT && (nd->nl_flags & (NLC_CREATE | NLC_RENAME))) {
c9aef211
MD
474 if (nd->nl_flags & NLC_NFS_RDONLY)
475 error = EROFS;
476 else
5dedb299 477 error = naccess(&nch, VCREATE, nd->nl_cred, NULL);
c9aef211 478 }
bf022891
MD
479 if (error == 0 && wasdotordotdot &&
480 (nd->nl_flags & (NLC_CREATE | NLC_RENAME | NLC_DELETE))) {
fa8302b1
SS
481 /*
482 * POSIX junk
483 */
484 if (nd->nl_flags & NLC_CREATE)
485 error = EEXIST;
486 else if (nd->nl_flags & NLC_DELETE)
487 error = (wasdotordotdot == 1) ? EINVAL : ENOTEMPTY;
488 else
489 error = EINVAL;
bf022891 490 }
fad57d0e
MD
491 }
492
493 /*
494 * Early completion on error.
21739618 495 */
690a3127 496 if (error) {
28623bf9 497 cache_put(&nch);
690a3127
MD
498 break;
499 }
500
501 /*
502 * If the element is a symlink and it is either not the last
503 * element or it is the last element and we are allowed to
504 * follow symlinks, resolve the symlink.
505 */
28623bf9 506 if ((nch.ncp->nc_flag & NCF_ISSYMLINK) &&
690a3127
MD
507 (*ptr || (nd->nl_flags & NLC_FOLLOW))
508 ) {
509 if (nd->nl_loopcnt++ >= MAXSYMLINKS) {
21739618 510 error = ELOOP;
28623bf9 511 cache_put(&nch);
690a3127
MD
512 break;
513 }
28623bf9
MD
514 error = nreadsymlink(nd, &nch, &nlc);
515 cache_put(&nch);
21739618
MD
516 if (error)
517 break;
690a3127
MD
518
519 /*
520 * Concatenate trailing path elements onto the returned symlink.
521 * Note that if the path component (ptr) is not exhausted, it
522 * will being with a '/', so we do not have to add another one.
523 *
524 * The symlink may not be empty.
525 */
526 len = strlen(ptr);
527 if (nlc.nlc_namelen == 0 || nlc.nlc_namelen + len >= MAXPATHLEN) {
528 error = nlc.nlc_namelen ? ENAMETOOLONG : ENOENT;
70aac194 529 objcache_put(namei_oc, nlc.nlc_nameptr);
690a3127
MD
530 break;
531 }
532 bcopy(ptr, nlc.nlc_nameptr + nlc.nlc_namelen, len + 1);
533 if (nd->nl_flags & NLC_HASBUF)
70aac194 534 objcache_put(namei_oc, nd->nl_path);
690a3127
MD
535 nd->nl_path = nlc.nlc_nameptr;
536 nd->nl_flags |= NLC_HASBUF;
537 ptr = nd->nl_path;
538
539 /*
540 * Go back up to the top to resolve any initial '/'s in the
541 * symlink.
542 */
543 continue;
544 }
545
546 /*
21739618 547 * If the element is a directory and we are crossing a mount point,
28623bf9 548 * Locate the mount.
21739618 549 */
28623bf9
MD
550 while ((nch.ncp->nc_flag & NCF_ISMOUNTPT) &&
551 (nd->nl_flags & NLC_NOCROSSMOUNT) == 0 &&
552 (mp = cache_findmount(&nch)) != NULL
21739618 553 ) {
21739618
MD
554 struct vnode *tdp;
555
28623bf9
MD
556 cache_put(&nch);
557 cache_get(&mp->mnt_ncmountpt, &nch);
21739618 558
28623bf9 559 if (nch.ncp->nc_flag & NCF_UNRESOLVED) {
f9642f56 560 while (vfs_busy(mp, 0))
21739618
MD
561 ;
562 error = VFS_ROOT(mp, &tdp);
f9642f56 563 vfs_unbusy(mp);
524c845c 564 if (error)
21739618 565 break;
28623bf9 566 cache_setvp(&nch, tdp);
21739618
MD
567 vput(tdp);
568 }
569 }
570 if (error) {
28623bf9 571 cache_put(&nch);
21739618
MD
572 break;
573 }
574
575 /*
690a3127
MD
576 * Skip any slashes to get to the next element. If there
577 * are any slashes at all the current element must be a
fad57d0e
MD
578 * directory or, in the create case, intended to become a directory.
579 * If it isn't we break without incrementing ptr and fall through
580 * to the failure case below.
690a3127
MD
581 */
582 while (*ptr == '/') {
28623bf9 583 if ((nch.ncp->nc_flag & NCF_ISDIR) == 0 &&
fad57d0e
MD
584 !(nd->nl_flags & NLC_WILLBEDIR)
585 ) {
690a3127 586 break;
fad57d0e 587 }
690a3127
MD
588 ++ptr;
589 }
590
591 /*
592 * Continuation case: additional elements and the current
593 * element is a directory.
594 */
28623bf9
MD
595 if (*ptr && (nch.ncp->nc_flag & NCF_ISDIR)) {
596 cache_drop(&nd->nl_nch);
597 cache_unlock(&nch);
598 nd->nl_nch = nch;
690a3127
MD
599 continue;
600 }
601
602 /*
603 * Failure case: additional elements and the current element
604 * is not a directory
605 */
606 if (*ptr) {
28623bf9 607 cache_put(&nch);
690a3127
MD
608 error = ENOTDIR;
609 break;
610 }
611
612 /*
fad57d0e
MD
613 * Successful lookup of last element.
614 *
bf022891
MD
615 * Check directory permissions if a deletion or rename (target)
616 * is specified. This also handles the sticky test.
617 *
618 * We already checked permissions for creates in the early
619 * termination code above.
fad57d0e 620 */
bf022891
MD
621 if (*ptr == 0 && (nd->nl_flags & (NLC_DELETE | NLC_RENAME))) {
622 if (nd->nl_flags & NLC_DELETE)
623 error = naccess(&nch, VDELETE, nd->nl_cred, NULL);
624 else
625 error = naccess(&nch, VRENAME, nd->nl_cred, NULL);
626 if (error) {
28623bf9 627 cache_put(&nch);
fad57d0e
MD
628 break;
629 }
630 }
631
632 /*
fad57d0e
MD
633 * Termination: no more elements. If NLC_CREATE was set the
634 * ncp may represent a negative hit (ncp->nc_error will be ENOENT),
635 * but we still return an error code of 0.
5312fa43
MD
636 *
637 * If NLC_REFDVP is set acquire a referenced parent dvp.
690a3127 638 */
5312fa43
MD
639 if (nd->nl_flags & NLC_REFDVP) {
640 error = cache_vref(&nd->nl_nch, nd->nl_cred, &nd->nl_dvp);
641 if (error) {
642 kprintf("NLC_REFDVP: Cannot ref dvp of %p\n", nch.ncp);
643 cache_put(&nch);
644 break;
645 }
646 }
28623bf9
MD
647 cache_drop(&nd->nl_nch);
648 nd->nl_nch = nch;
690a3127
MD
649 nd->nl_flags |= NLC_NCPISLOCKED;
650 error = 0;
651 break;
652 }
653 return(error);
654}
655
656/*
21739618
MD
657 * Resolve a mount point's glue ncp. This ncp connects creates the illusion
658 * of continuity in the namecache tree by connecting the ncp related to the
659 * vnode under the mount to the ncp related to the mount's root vnode.
660 *
661 * If no error occured a locked, ref'd ncp is stored in *ncpp.
662 */
663int
28623bf9 664nlookup_mp(struct mount *mp, struct nchandle *nch)
21739618 665{
21739618
MD
666 struct vnode *vp;
667 int error;
668
669 error = 0;
28623bf9
MD
670 cache_get(&mp->mnt_ncmountpt, nch);
671 if (nch->ncp->nc_flag & NCF_UNRESOLVED) {
f9642f56 672 while (vfs_busy(mp, 0))
21739618
MD
673 ;
674 error = VFS_ROOT(mp, &vp);
f9642f56 675 vfs_unbusy(mp);
21739618 676 if (error) {
28623bf9 677 cache_put(nch);
21739618 678 } else {
28623bf9 679 cache_setvp(nch, vp);
21739618
MD
680 vput(vp);
681 }
682 }
21739618
MD
683 return(error);
684}
685
686/*
690a3127 687 * Read the contents of a symlink, allocate a path buffer out of the
70aac194 688 * namei_oc and initialize the supplied nlcomponent with the result.
690a3127
MD
689 *
690 * If an error occurs no buffer will be allocated or returned in the nlc.
691 */
692int
28623bf9 693nreadsymlink(struct nlookupdata *nd, struct nchandle *nch,
690a3127
MD
694 struct nlcomponent *nlc)
695{
21739618 696 struct vnode *vp;
690a3127
MD
697 struct iovec aiov;
698 struct uio auio;
699 int linklen;
700 int error;
701 char *cp;
702
703 nlc->nlc_nameptr = NULL;
704 nlc->nlc_namelen = 0;
28623bf9 705 if (nch->ncp->nc_vp == NULL)
690a3127 706 return(ENOENT);
28623bf9 707 if ((error = cache_vget(nch, nd->nl_cred, LK_SHARED, &vp)) != 0)
690a3127 708 return(error);
70aac194 709 cp = objcache_get(namei_oc, M_WAITOK);
690a3127
MD
710 aiov.iov_base = cp;
711 aiov.iov_len = MAXPATHLEN;
712 auio.uio_iov = &aiov;
713 auio.uio_iovcnt = 1;
714 auio.uio_offset = 0;
715 auio.uio_rw = UIO_READ;
716 auio.uio_segflg = UIO_SYSSPACE;
717 auio.uio_td = nd->nl_td;
718 auio.uio_resid = MAXPATHLEN - 1;
21739618 719 error = VOP_READLINK(vp, &auio, nd->nl_cred);
690a3127
MD
720 if (error)
721 goto fail;
21739618 722 linklen = MAXPATHLEN - 1 - auio.uio_resid;
690a3127
MD
723 if (varsym_enable) {
724 linklen = varsymreplace(cp, linklen, MAXPATHLEN - 1);
725 if (linklen < 0) {
726 error = ENAMETOOLONG;
727 goto fail;
728 }
729 }
730 cp[linklen] = 0;
731 nlc->nlc_nameptr = cp;
732 nlc->nlc_namelen = linklen;
21739618 733 vput(vp);
690a3127
MD
734 return(0);
735fail:
70aac194 736 objcache_put(namei_oc, cp);
21739618 737 vput(vp);
690a3127
MD
738 return(error);
739}
740
21739618
MD
741/*
742 * Check access [XXX cache vattr!] [XXX quota]
743 *
744 * Generally check the V* access bits from sys/vnode.h. All specified bits
745 * must pass for this function to return 0.
746 *
bf022891
MD
747 * The file does not have to exist when checking VCREATE or VRENAME access.
748 *
749 * The file must not exist if VEXCL is specified.
21739618 750 *
bf022891
MD
751 * Directory permissions in general are tested for VCREATE if the file
752 * does not exist, VDELETE if the file does exist, and VRENAME whether
753 * the file exists or not.
21739618 754 *
bf022891
MD
755 * The directory sticky bit is tested for VDELETE and VRENAME. NOTE: For
756 * VRENAME we only care if the target exists.
5dedb299 757 *
21739618 758 * The passed ncp may or may not be locked. The caller should use a
bf022891
MD
759 * locked ncp on leaf lookups, especially for VCREATE, VRENAME, VDELETE,
760 * and VEXCL checks.
21739618
MD
761 */
762int
5dedb299 763naccess(struct nchandle *nch, int vmode, struct ucred *cred, int *stickyp)
21739618 764{
28623bf9 765 struct nchandle par;
21739618
MD
766 struct vnode *vp;
767 struct vattr va;
768 int error;
5dedb299 769 int sticky;
21739618 770
28623bf9
MD
771 if (nch->ncp->nc_flag & NCF_UNRESOLVED) {
772 cache_lock(nch);
773 cache_resolve(nch, cred);
774 cache_unlock(nch);
21739618 775 }
28623bf9 776 error = nch->ncp->nc_error;
5dedb299
MD
777 sticky = 0;
778
bf022891
MD
779 /*
780 * Directory permissions and VEXCL checks. Do a precursor conditional
781 * to reduce overhead since most access checks are for read-only.
782 */
783 if (vmode & (VDELETE|VRENAME|VCREATE|VEXCL)) {
28623bf9 784 if (((vmode & VCREATE) && nch->ncp->nc_vp == NULL) ||
bf022891
MD
785 ((vmode & VDELETE) && nch->ncp->nc_vp != NULL) ||
786 (vmode & VRENAME)
21739618 787 ) {
28623bf9 788 if ((par.ncp = nch->ncp->nc_parent) == NULL) {
8e005a45 789 if (error != EAGAIN)
fad57d0e 790 error = EINVAL;
8e005a45 791 } else {
28623bf9
MD
792 par.mount = nch->mount;
793 cache_hold(&par);
5dedb299 794 error = naccess(&par, VWRITE, cred, &sticky);
bf022891 795 if ((vmode & (VDELETE | VRENAME)) && sticky)
5dedb299 796 vmode |= VSVTX;
28623bf9 797 cache_drop(&par);
8e005a45 798 }
21739618 799 }
28623bf9 800 if ((vmode & VEXCL) && nch->ncp->nc_vp != NULL)
21739618
MD
801 error = EEXIST;
802 }
803 if (error == 0) {
28623bf9 804 error = cache_vget(nch, cred, LK_SHARED, &vp);
21739618 805 if (error == ENOENT) {
bf022891 806 if (vmode & (VCREATE | VRENAME))
21739618
MD
807 error = 0;
808 } else if (error == 0) {
809 /* XXX cache the va in the namecache or in the vnode */
87de5057 810 if ((error = VOP_GETATTR(vp, &va)) == 0) {
21739618
MD
811 if ((vmode & VWRITE) && vp->v_mount) {
812 if (vp->v_mount->mnt_flag & MNT_RDONLY)
813 error = EROFS;
814 }
815 }
816 vput(vp);
5dedb299
MD
817
818 if (error == 0) {
819 /*
820 * Set the returned (*stickyp) if VSVTX is set and the uid
821 * is not the owner of the directory. The caller uses this
822 * disallow deletions of files not owned by the user if the
823 * user also does not own the directory and the sticky bit
824 * is set on the directory. Weird, I know.
825 */
826 if (stickyp && va.va_uid != cred->cr_uid)
827 *stickyp = (va.va_mode & VSVTX);
828
829 /*
830 * Process general access.
831 */
21739618 832 error = naccess_va(&va, vmode, cred);
5dedb299 833 }
21739618
MD
834 }
835 }
836 return(error);
837}
838
839/*
840 * Check the requested access against the given vattr using cred.
841 */
5dedb299 842static
21739618
MD
843int
844naccess_va(struct vattr *va, int vmode, struct ucred *cred)
845{
846 int i;
847
848 /*
849 * Test the immutable bit for files, directories, and softlinks.
bf022891
MD
850 *
851 * NOTE: Only called for VRENAME if the target exists.
21739618 852 */
bf022891 853 if (vmode & (VWRITE|VDELETE|VRENAME)) {
21739618
MD
854 if (va->va_type == VDIR || va->va_type == VLNK || va->va_type == VREG) {
855 if (va->va_flags & IMMUTABLE)
856 return (EPERM);
857 }
858 }
859
860 /*
861 * root gets universal access
862 */
863 if (cred->cr_uid == 0)
864 return(0);
865
866 /*
5dedb299
MD
867 * Check owner perms.
868 *
869 * If VOWN is set the owner of the file is allowed no matter when
870 * the owner-mode bits say (utimes).
21739618 871 */
21739618 872 if (cred->cr_uid == va->va_uid) {
e1b4bb12
MD
873 if ((vmode & VOWN) == 0) {
874 vmode &= S_IRWXU;
875 if ((vmode & va->va_mode) != vmode)
876 return(EACCES);
877 }
21739618
MD
878 return(0);
879 }
5dedb299
MD
880
881 /*
bf022891
MD
882 * If VSVTX is set only the owner may create or delete the file.
883 * This bit is typically set for VDELETE checks from unlink or
884 * the source file in a rename, and for VCREATE checks from the
885 * target file in a rename.
5dedb299 886 *
bf022891
MD
887 * Note that other V bits are not typically set in the VSVTX case.
888 * For creations and deletions we usually just care about directory
889 * permissions, not file permissions. So if this test passes the
890 * return value winds up being 0.
5dedb299
MD
891 */
892 if (vmode & VSVTX)
893 return(EACCES);
894
895 /*
896 * Check group perms
897 */
e1b4bb12 898 vmode &= S_IRWXU;
21739618
MD
899 vmode >>= 3;
900 for (i = 0; i < cred->cr_ngroups; ++i) {
901 if (va->va_gid == cred->cr_groups[i]) {
902 if ((vmode & va->va_mode) != vmode)
903 return(EACCES);
904 return(0);
905 }
906 }
907
5dedb299
MD
908 /*
909 * Check world perms
910 */
21739618
MD
911 vmode >>= 3;
912 if ((vmode & va->va_mode) != vmode)
913 return(EACCES);
914 return(0);
915}
916