ext2fs - Clear CNP_PDIRUNLOCK flag after a lookup.
[dragonfly.git] / sys / gnu / vfs / ext2fs / ext2_lookup.c
CommitLineData
984263bc
MD
1/*
2 * modified for Lites 1.1
3 *
4 * Aug 1995, Godmar Back (gback@cs.utah.edu)
5 * University of Utah, Department of Computer Science
6 *
7 * $FreeBSD: src/sys/gnu/ext2fs/ext2_lookup.c,v 1.21.2.3 2002/11/17 02:02:42 bde Exp $
8 */
9/*
10 * Copyright (c) 1989, 1993
11 * The Regents of the University of California. All rights reserved.
12 * (c) UNIX System Laboratories, Inc.
13 * All or some portions of this file are derived from material licensed
14 * to the University of California by American Telephone and Telegraph
15 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
16 * the permission of UNIX System Laboratories, Inc.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the University of
29 * California, Berkeley and its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44 * SUCH DAMAGE.
45 *
46 * @(#)ufs_lookup.c 8.6 (Berkeley) 4/1/94
47 */
48
49#include <sys/param.h>
50#include <sys/systm.h>
51#include <sys/namei.h>
52#include <sys/buf.h>
53#include <sys/mount.h>
54#include <sys/vnode.h>
55#include <sys/malloc.h>
56#include <sys/dirent.h>
57
1f1db49f
MD
58#include "quota.h"
59#include "inode.h"
60#include "dir.h"
61#include "ext2mount.h"
1f2de5d4
MD
62#include "ext2_extern.h"
63#include "ext2_fs.h"
64#include "ext2_fs_sb.h"
984263bc 65
b993bb87 66/*
984263bc
MD
67 DIRBLKSIZE in ffs is DEV_BSIZE (in most cases 512)
68 while it is the native blocksize in ext2fs - thus, a #define
69 is no longer appropriate
70*/
71#undef DIRBLKSIZ
72
73extern int dirchk;
74
75static u_char ext2_ft_to_dt[] = {
76 DT_UNKNOWN, /* EXT2_FT_UNKNOWN */
77 DT_REG, /* EXT2_FT_REG_FILE */
78 DT_DIR, /* EXT2_FT_DIR */
79 DT_CHR, /* EXT2_FT_CHRDEV */
80 DT_BLK, /* EXT2_FT_BLKDEV */
81 DT_FIFO, /* EXT2_FT_FIFO */
82 DT_SOCK, /* EXT2_FT_SOCK */
83 DT_LNK, /* EXT2_FT_SYMLINK */
84};
85#define FTTODT(ft) \
c157ff7a 86 ((ft) > NELEM(ext2_ft_to_dt) ? \
984263bc
MD
87 DT_UNKNOWN : ext2_ft_to_dt[(ft)])
88
89static u_char dt_to_ext2_ft[] = {
90 EXT2_FT_UNKNOWN, /* DT_UNKNOWN */
91 EXT2_FT_FIFO, /* DT_FIFO */
92 EXT2_FT_CHRDEV, /* DT_CHR */
93 EXT2_FT_UNKNOWN, /* unused */
94 EXT2_FT_DIR, /* DT_DIR */
95 EXT2_FT_UNKNOWN, /* unused */
96 EXT2_FT_BLKDEV, /* DT_BLK */
97 EXT2_FT_UNKNOWN, /* unused */
98 EXT2_FT_REG_FILE, /* DT_REG */
99 EXT2_FT_UNKNOWN, /* unused */
100 EXT2_FT_SYMLINK, /* DT_LNK */
101 EXT2_FT_UNKNOWN, /* unused */
102 EXT2_FT_SOCK, /* DT_SOCK */
103 EXT2_FT_UNKNOWN, /* unused */
104 EXT2_FT_UNKNOWN, /* DT_WHT */
105};
106#define DTTOFT(dt) \
c157ff7a 107 ((dt) > NELEM(dt_to_ext2_ft) ? \
984263bc
MD
108 EXT2_FT_UNKNOWN : dt_to_ext2_ft[(dt)])
109
a6ee311a 110static int ext2_dirbadentry (struct vnode *dp,
984263bc 111 struct ext2_dir_entry_2 *de,
a6ee311a 112 int entryoffsetinblock);
984263bc
MD
113
114/*
115 * Vnode op for reading directories.
116 *
117 * The routine below assumes that the on-disk format of a directory
118 * is the same as that defined by <sys/dirent.h>. If the on-disk
119 * format changes, then it will be necessary to do a conversion
120 * from the on-disk format that read returns to the format defined
121 * by <sys/dirent.h>.
122 */
123/*
124 * this is exactly what we do here - the problem is that the conversion
125 * will blow up some entries by four bytes, so it can't be done in place.
126 * This is too bad. Right now the conversion is done entry by entry, the
b993bb87 127 * converted entry is sent via uiomove.
984263bc
MD
128 *
129 * XXX allocate a buffer, convert as many entries as possible, then send
130 * the whole buffer to uiomove
0f7f7a49
CP
131 *
132 * ext2_readdir(struct vnode *a_vp, struct uio *a_uio, struct ucred *a_cred)
984263bc
MD
133 */
134int
0f7f7a49 135ext2_readdir(struct vop_readdir_args *ap)
984263bc 136{
f7aae92f 137 struct uio *uio = ap->a_uio;
984263bc 138 int count, error;
984263bc
MD
139 struct ext2_dir_entry_2 *edp, *dp;
140 int ncookies;
984263bc
MD
141 struct uio auio;
142 struct iovec aiov;
143 caddr_t dirbuf;
144 int DIRBLKSIZ = VTOI(ap->a_vp)->i_e2fs->s_blocksize;
a1435edc 145 int readcnt, retval;
984263bc
MD
146 off_t startoffset = uio->uio_offset;
147
885ecb13
MD
148 if ((error = vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY)) != 0)
149 return(error);
150
984263bc
MD
151 count = uio->uio_resid;
152 /*
153 * Avoid complications for partial directory entries by adjusting
154 * the i/o to end at a block boundary. Don't give up (like ufs
155 * does) if the initial adjustment gives a negative count, since
156 * many callers don't supply a large enough buffer. The correct
157 * size is a little larger than DIRBLKSIZ to allow for expansion
158 * of directory entries, but some callers just use 512.
159 */
160 count -= (uio->uio_offset + count) & (DIRBLKSIZ -1);
161 if (count <= 0)
162 count += DIRBLKSIZ;
fb0466c9
MD
163 if (count > MAXBSIZE) /* limit to a reasonable size */
164 count = MAXBSIZE;
984263bc
MD
165
166#ifdef EXT2FS_DEBUG
b993bb87 167 kprintf("ext2_readdir: uio_offset = %lld, uio_resid = %d, count = %d\n",
984263bc
MD
168 uio->uio_offset, uio->uio_resid, count);
169#endif
170
171 auio = *uio;
172 auio.uio_iov = &aiov;
173 auio.uio_iovcnt = 1;
174 auio.uio_resid = count;
175 auio.uio_segflg = UIO_SYSSPACE;
176 aiov.iov_len = count;
884717e1 177 dirbuf = kmalloc(count, M_TEMP, M_WAITOK);
984263bc
MD
178 aiov.iov_base = dirbuf;
179 error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
180 if (error == 0) {
181 readcnt = count - auio.uio_resid;
182 edp = (struct ext2_dir_entry_2 *)&dirbuf[readcnt];
183 ncookies = 0;
b993bb87 184 for (dp = (struct ext2_dir_entry_2 *)dirbuf;
984263bc
MD
185 !error && uio->uio_resid > 0 && dp < edp; ) {
186 /*-
187 * "New" ext2fs directory entries differ in 3 ways
188 * from ufs on-disk ones:
189 * - the name is not necessarily NUL-terminated.
190 * - the file type field always exists and always
191 * follows the name length field.
192 * - the file type is encoded in a different way.
193 *
194 * "Old" ext2fs directory entries need no special
195 * conversions, since they binary compatible with
196 * "new" entries having a file type of 0 (i.e.,
197 * EXT2_FT_UNKNOWN). Splitting the old name length
198 * field didn't make a mess like it did in ufs,
199 * because ext2fs uses a machine-dependent disk
200 * layout.
201 */
a1435edc 202 if (dp->rec_len <= 0) {
984263bc
MD
203 error = EIO;
204 break;
205 }
a1435edc
JS
206 retval = vop_write_dirent(&error, uio, dp->inode,
207 FTTODT(dp->file_type), dp->name_len, dp->name);
208
209 if (retval)
210 break;
211 /* advance dp */
b993bb87 212 dp = (struct ext2_dir_entry_2 *)((char *)dp + dp->rec_len);
a1435edc
JS
213 if (!error)
214 ncookies++;
984263bc
MD
215 }
216 /* we need to correct uio_offset */
217 uio->uio_offset = startoffset + (caddr_t)dp - dirbuf;
218
219 if (!error && ap->a_ncookies != NULL) {
84009d92 220 off_t *cookiep, *cookies, *ecookies;
984263bc
MD
221 off_t off;
222
223 if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
224 panic("ext2fs_readdir: unexpected uio from NFS server");
fb0466c9 225 if (ncookies) {
884717e1
SW
226 cookies = kmalloc(ncookies * sizeof(off_t),
227 M_TEMP, M_WAITOK);
fb0466c9 228 } else {
884717e1
SW
229 cookies = kmalloc(sizeof(off_t), M_TEMP,
230 M_WAITOK);
fb0466c9 231 }
984263bc
MD
232 off = startoffset;
233 for (dp = (struct ext2_dir_entry_2 *)dirbuf,
234 cookiep = cookies, ecookies = cookies + ncookies;
235 cookiep < ecookies;
236 dp = (struct ext2_dir_entry_2 *)((caddr_t) dp + dp->rec_len)) {
237 off += dp->rec_len;
84009d92 238 *cookiep++ = off;
984263bc
MD
239 }
240 *ap->a_ncookies = ncookies;
241 *ap->a_cookies = cookies;
242 }
243 }
884717e1 244 kfree(dirbuf, M_TEMP);
984263bc
MD
245 if (ap->a_eofflag)
246 *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset;
885ecb13 247 vn_unlock(ap->a_vp);
984263bc
MD
248 return (error);
249}
250
251/*
252 * Convert a component of a pathname into a pointer to a locked inode.
253 * This is a very central and rather complicated routine.
254 * If the file system is not maintained in a strict tree hierarchy,
255 * this can result in a deadlock situation (see comments in code below).
256 *
257 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
258 * on whether the name is to be looked up, created, renamed, or deleted.
259 * When CREATE, RENAME, or DELETE is specified, information usable in
260 * creating, renaming, or deleting a directory entry may be calculated.
261 * If flag has LOCKPARENT or'ed into it and the target of the pathname
262 * exists, lookup returns both the target and its parent directory locked.
263 * When creating or renaming and LOCKPARENT is specified, the target may
264 * not be ".". When deleting and LOCKPARENT is specified, the target may
265 * be "."., but the caller must check to ensure it does an vrele and vput
266 * instead of two vputs.
267 *
268 * Overall outline of ufs_lookup:
269 *
270 * search for name in directory, to found or notfound
271 * notfound:
272 * if creating, return locked directory, leaving info on available slots
273 * else return error
274 * found:
275 * if at end of path and deleting, return information to allow delete
276 * if at end of path and rewriting (RENAME and LOCKPARENT), lock target
277 * inode and return info to allow rewrite
278 * if not at end, add name to cache; if at end and neither creating
279 * nor deleting, add name to cache
0f7f7a49
CP
280 *
281 * ext2_lookup(struct vnode *a_dvp, struct vnode **a_vpp,
282 * struct componentname *a_cnp)
984263bc
MD
283 */
284int
e62afb5f 285ext2_lookup(struct vop_old_lookup_args *ap)
984263bc 286{
f7aae92f
RG
287 struct vnode *vdp; /* vnode for directory being searched */
288 struct inode *dp; /* inode for directory being searched */
984263bc 289 struct buf *bp; /* a buffer of directory entries */
f7aae92f 290 struct ext2_dir_entry_2 *ep; /* the current directory entry */
984263bc
MD
291 int entryoffsetinblock; /* offset of ep in bp's buffer */
292 enum {NONE, COMPACT, FOUND} slotstatus;
293 doff_t slotoffset; /* offset of area with free space */
294 int slotsize; /* size of area at slotoffset */
295 int slotfreespace; /* amount of space free in slot */
296 int slotneeded; /* size of the entry we're seeking */
297 int numdirpasses; /* strategy for directory search */
298 doff_t endsearch; /* offset to end directory search */
299 doff_t prevoff; /* prev entry dp->i_offset */
300 struct vnode *pdp; /* saved dp during symlink work */
301 struct vnode *tdp; /* returned by VFS_VGET */
302 doff_t enduseful; /* pointer past last used dir slot */
303 u_long bmask; /* block offset mask */
304 int lockparent; /* 1 => lockparent flag is set */
305 int wantparent; /* 1 => wantparent or lockparent flag */
306 int namlen, error;
307 struct vnode **vpp = ap->a_vpp;
308 struct componentname *cnp = ap->a_cnp;
309 struct ucred *cred = cnp->cn_cred;
310 int flags = cnp->cn_flags;
311 int nameiop = cnp->cn_nameiop;
984263bc
MD
312
313 int DIRBLKSIZ = VTOI(ap->a_dvp)->i_e2fs->s_blocksize;
314
315 bp = NULL;
316 slotoffset = -1;
317 *vpp = NULL;
318 vdp = ap->a_dvp;
319 dp = VTOI(vdp);
2b69e610
MD
320 lockparent = flags & CNP_LOCKPARENT;
321 wantparent = flags & (CNP_LOCKPARENT|CNP_WANTPARENT);
984263bc
MD
322
323 /*
324 * We now have a segment name to search for, and a directory to search.
325 */
326
327 /*
328 * Suppress search for slots unless creating
329 * file and at end of pathname, in which case
330 * we watch for a place to put the new file in
331 * case it doesn't already exist.
332 */
333 slotstatus = FOUND;
334 slotfreespace = slotsize = slotneeded = 0;
fad57d0e 335 if (nameiop == NAMEI_CREATE || nameiop == NAMEI_RENAME) {
984263bc 336 slotstatus = NONE;
b993bb87 337 slotneeded = EXT2_DIR_REC_LEN(cnp->cn_namelen);
984263bc
MD
338 /* was
339 slotneeded = (sizeof(struct direct) - MAXNAMLEN +
340 cnp->cn_namelen + 3) &~ 3; */
341 }
342
343 /*
344 * If there is cached information on a previous search of
345 * this directory, pick up where we last left off.
346 * We cache only lookups as these are the most common
347 * and have the greatest payoff. Caching CREATE has little
348 * benefit as it usually must search the entire directory
349 * to determine that the entry does not exist. Caching the
350 * location of the last DELETE or RENAME has not reduced
351 * profiling time and hence has been removed in the interest
352 * of simplicity.
353 */
1f1db49f 354 bmask = VFSTOEXT2(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
2b69e610 355 if (nameiop != NAMEI_LOOKUP || dp->i_diroff == 0 ||
984263bc
MD
356 dp->i_diroff > dp->i_size) {
357 entryoffsetinblock = 0;
358 dp->i_offset = 0;
359 numdirpasses = 1;
360 } else {
361 dp->i_offset = dp->i_diroff;
362 if ((entryoffsetinblock = dp->i_offset & bmask) &&
1f1db49f 363 (error = EXT2_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp)))
984263bc
MD
364 return (error);
365 numdirpasses = 2;
984263bc
MD
366 }
367 prevoff = dp->i_offset;
368 endsearch = roundup(dp->i_size, DIRBLKSIZ);
369 enduseful = 0;
370
371searchloop:
372 while (dp->i_offset < endsearch) {
373 /*
374 * If necessary, get the next directory block.
375 */
376 if ((dp->i_offset & bmask) == 0) {
377 if (bp != NULL)
378 brelse(bp);
379 if ((error =
1f1db49f 380 EXT2_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp)) != 0)
984263bc
MD
381 return (error);
382 entryoffsetinblock = 0;
383 }
384 /*
385 * If still looking for a slot, and at a DIRBLKSIZE
386 * boundary, have to start looking for free space again.
387 */
388 if (slotstatus == NONE &&
389 (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
390 slotoffset = -1;
391 slotfreespace = 0;
392 }
393 /*
394 * Get pointer to next entry.
395 * Full validation checks are slow, so we only check
396 * enough to insure forward progress through the
397 * directory. Complete checks can be run by patching
398 * "dirchk" to be true.
399 */
400 ep = (struct ext2_dir_entry_2 *)
401 ((char *)bp->b_data + entryoffsetinblock);
402 if (ep->rec_len == 0 ||
403 (dirchk && ext2_dirbadentry(vdp, ep, entryoffsetinblock))) {
404 int i;
1f1db49f 405 ext2_dirbad(dp, dp->i_offset, "mangled entry");
984263bc
MD
406 i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
407 dp->i_offset += i;
408 entryoffsetinblock += i;
409 continue;
410 }
411
412 /*
413 * If an appropriate sized slot has not yet been found,
414 * check to see if one is available. Also accumulate space
415 * in the current block so that we can determine if
416 * compaction is viable.
417 */
418 if (slotstatus != FOUND) {
419 int size = ep->rec_len;
420
421 if (ep->inode != 0)
422 size -= EXT2_DIR_REC_LEN(ep->name_len);
423 if (size > 0) {
424 if (size >= slotneeded) {
425 slotstatus = FOUND;
426 slotoffset = dp->i_offset;
427 slotsize = ep->rec_len;
428 } else if (slotstatus == NONE) {
429 slotfreespace += size;
430 if (slotoffset == -1)
431 slotoffset = dp->i_offset;
432 if (slotfreespace >= slotneeded) {
433 slotstatus = COMPACT;
434 slotsize = dp->i_offset +
435 ep->rec_len - slotoffset;
436 }
437 }
438 }
439 }
440
441 /*
442 * Check for a name match.
443 */
444 if (ep->inode) {
445 namlen = ep->name_len;
446 if (namlen == cnp->cn_namelen &&
447 !bcmp(cnp->cn_nameptr, ep->name,
448 (unsigned)namlen)) {
449 /*
450 * Save directory entry's inode number and
451 * reclen in ndp->ni_ufs area, and release
452 * directory buffer.
453 */
454 dp->i_ino = ep->inode;
455 dp->i_reclen = ep->rec_len;
456 goto found;
457 }
458 }
459 prevoff = dp->i_offset;
460 dp->i_offset += ep->rec_len;
461 entryoffsetinblock += ep->rec_len;
462 if (ep->inode)
463 enduseful = dp->i_offset;
464 }
465/* notfound: */
466 /*
467 * If we started in the middle of the directory and failed
468 * to find our target, we must check the beginning as well.
469 */
470 if (numdirpasses == 2) {
471 numdirpasses--;
472 dp->i_offset = 0;
473 endsearch = dp->i_diroff;
474 goto searchloop;
475 }
476 if (bp != NULL)
477 brelse(bp);
478 /*
479 * If creating, and at end of pathname and current
480 * directory has not been removed, then can consider
481 * allowing file to be created.
482 */
2b69e610 483 if ((nameiop == NAMEI_CREATE || nameiop == NAMEI_RENAME) &&
fad57d0e 484 dp->i_nlink != 0) {
984263bc
MD
485 /*
486 * Access for write is interpreted as allowing
487 * creation of files in the directory.
488 */
cb66845a 489 if ((error = VOP_EACCESS(vdp, VWRITE, cred)) != 0)
984263bc
MD
490 return (error);
491 /*
492 * Return an indication of where the new directory
493 * entry should be put. If we didn't find a slot,
494 * then set dp->i_count to 0 indicating
495 * that the new slot belongs at the end of the
496 * directory. If we found a slot, then the new entry
497 * can be put in the range from dp->i_offset to
498 * dp->i_offset + dp->i_count.
499 */
500 if (slotstatus == NONE) {
501 dp->i_offset = roundup(dp->i_size, DIRBLKSIZ);
502 dp->i_count = 0;
503 enduseful = dp->i_offset;
504 } else {
505 dp->i_offset = slotoffset;
506 dp->i_count = slotsize;
507 if (enduseful < slotoffset + slotsize)
508 enduseful = slotoffset + slotsize;
509 }
510 dp->i_endoff = roundup(enduseful, DIRBLKSIZ);
511 dp->i_flag |= IN_CHANGE | IN_UPDATE;
512 /*
513 * We return with the directory locked, so that
514 * the parameters we set up above will still be
515 * valid if we actually decide to do a direnter().
516 * We return ni_vp == NULL to indicate that the entry
517 * does not currently exist; we leave a pointer to
518 * the (locked) directory inode in ndp->ni_dvp.
519 * The pathname buffer is saved so that the name
520 * can be obtained later.
521 *
522 * NB - if the directory is unlocked, then this
523 * information cannot be used.
524 */
984263bc 525 if (!lockparent)
a11aaa81 526 vn_unlock(vdp);
984263bc
MD
527 return (EJUSTRETURN);
528 }
984263bc
MD
529 return (ENOENT);
530
531found:
984263bc
MD
532 /*
533 * Check that directory length properly reflects presence
534 * of this entry.
535 */
536 if (entryoffsetinblock + EXT2_DIR_REC_LEN(ep->name_len)
537 > dp->i_size) {
1f1db49f 538 ext2_dirbad(dp, dp->i_offset, "i_size too small");
984263bc
MD
539 dp->i_size = entryoffsetinblock+EXT2_DIR_REC_LEN(ep->name_len);
540 dp->i_flag |= IN_CHANGE | IN_UPDATE;
541 }
542 brelse(bp);
543
544 /*
545 * Found component in pathname.
546 * If the final component of path name, save information
547 * in the cache as to where the entry was found.
548 */
fad57d0e 549 if (nameiop == NAMEI_LOOKUP)
984263bc
MD
550 dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1);
551
552 /*
553 * If deleting, and at end of pathname, return
554 * parameters which can be used to remove file.
555 * If the wantparent flag isn't set, we return only
556 * the directory (in ndp->ni_dvp), otherwise we go
557 * on and lock the inode, being careful with ".".
558 */
fad57d0e 559 if (nameiop == NAMEI_DELETE) {
984263bc
MD
560 /*
561 * Write access to directory required to delete files.
562 */
cb66845a 563 if ((error = VOP_EACCESS(vdp, VWRITE, cred)) != 0)
984263bc
MD
564 return (error);
565 /*
566 * Return pointer to current entry in dp->i_offset,
567 * and distance past previous entry (if there
568 * is a previous entry in this block) in dp->i_count.
569 * Save directory inode pointer in ndp->ni_dvp for dirremove().
570 */
571 if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
572 dp->i_count = 0;
573 else
574 dp->i_count = dp->i_offset - prevoff;
575 if (dp->i_number == dp->i_ino) {
597aea93 576 vref(vdp);
984263bc
MD
577 *vpp = vdp;
578 return (0);
579 }
b9b0a6d0 580 if ((error = VFS_VGET(vdp->v_mount, NULL, dp->i_ino, &tdp)) != 0)
984263bc
MD
581 return (error);
582 /*
583 * If directory is "sticky", then user must own
584 * the directory, or the file in it, else she
585 * may not delete it (unless she's root). This
586 * implements append-only directories.
587 */
588 if ((dp->i_mode & ISVTX) &&
589 cred->cr_uid != 0 &&
590 cred->cr_uid != dp->i_uid &&
591 VTOI(tdp)->i_uid != cred->cr_uid) {
592 vput(tdp);
593 return (EPERM);
594 }
595 *vpp = tdp;
596 if (!lockparent)
a11aaa81 597 vn_unlock(vdp);
984263bc
MD
598 return (0);
599 }
600
601 /*
602 * If rewriting (RENAME), return the inode and the
603 * information required to rewrite the present directory
604 * Must get inode of directory entry to verify it's a
605 * regular file, or empty directory.
606 */
fad57d0e 607 if (nameiop == NAMEI_RENAME && wantparent) {
cb66845a 608 if ((error = VOP_EACCESS(vdp, VWRITE, cred)) != 0)
984263bc
MD
609 return (error);
610 /*
611 * Careful about locking second inode.
612 * This can only occur if the target is ".".
613 */
614 if (dp->i_number == dp->i_ino)
615 return (EISDIR);
b9b0a6d0 616 if ((error = VFS_VGET(vdp->v_mount, NULL, dp->i_ino, &tdp)) != 0)
984263bc
MD
617 return (error);
618 *vpp = tdp;
984263bc 619 if (!lockparent)
a11aaa81 620 vn_unlock(vdp);
984263bc
MD
621 return (0);
622 }
623
624 /*
625 * Step through the translation in the name. We do not `vput' the
626 * directory because we may need it again if a symbolic link
627 * is relative to the current directory. Instead we save it
628 * unlocked as "pdp". We must get the target inode before unlocking
629 * the directory to insure that the inode will not be removed
630 * before we get it. We prevent deadlock by always fetching
631 * inodes from the root, moving down the directory tree. Thus
632 * when following backward pointers ".." we must unlock the
633 * parent directory before getting the requested directory.
634 * There is a potential race condition here if both the current
635 * and parent directories are removed before the VFS_VGET for the
636 * inode associated with ".." returns. We hope that this occurs
637 * infrequently since we cannot avoid this race condition without
638 * implementing a sophisticated deadlock detection algorithm.
639 * Note also that this simple deadlock detection scheme will not
640 * work if the file system has any hard links other than ".."
641 * that point backwards in the directory structure.
642 */
643 pdp = vdp;
2b69e610 644 if (flags & CNP_ISDOTDOT) {
a11aaa81 645 vn_unlock(pdp); /* race to get the inode */
b9b0a6d0 646 if ((error = VFS_VGET(vdp->v_mount, NULL, dp->i_ino, &tdp)) != 0) {
ca466bae 647 vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY);
984263bc
MD
648 return (error);
649 }
ca466bae 650 if (lockparent && (error = vn_lock(pdp, LK_EXCLUSIVE))) {
984263bc
MD
651 vput(tdp);
652 return (error);
653 }
654 *vpp = tdp;
655 } else if (dp->i_number == dp->i_ino) {
597aea93 656 vref(vdp); /* we want ourself, ie "." */
984263bc
MD
657 *vpp = vdp;
658 } else {
b9b0a6d0 659 if ((error = VFS_VGET(vdp->v_mount, NULL, dp->i_ino, &tdp)) != 0)
984263bc 660 return (error);
5f4f1383 661 if (!lockparent) {
a11aaa81 662 vn_unlock(pdp);
5f4f1383
AHJ
663 cnp->cn_flags |= CNP_PDIRUNLOCK;
664 }
984263bc
MD
665 *vpp = tdp;
666 }
984263bc
MD
667 return (0);
668}
669
1f1db49f
MD
670void
671ext2_dirbad(struct inode *ip, doff_t offset, char *how)
672{
673 struct mount *mp;
674
675 mp = ITOV(ip)->v_mount;
086c1d7e 676 kprintf("%s: bad dir ino %lu at offset %ld: %s\n",
1f1db49f
MD
677 mp->mnt_stat.f_mntfromname, (u_long)ip->i_number,
678 (long)offset, how);
679 if ((mp->mnt_flag & MNT_RDONLY) == 0)
680 panic("ufs_dirbad: bad dir");
681}
682
984263bc
MD
683/*
684 * Do consistency checking on a directory entry:
685 * record length must be multiple of 4
686 * entry must fit in rest of its DIRBLKSIZ block
687 * record must be large enough to contain entry
688 * name is not longer than MAXNAMLEN
689 * name must be as long as advertised, and null terminated
690 */
691/*
692 * changed so that it confirms to ext2_check_dir_entry
693 */
694static int
0f7f7a49
CP
695ext2_dirbadentry(struct vnode *dp, struct ext2_dir_entry_2 *de,
696 int entryoffsetinblock)
984263bc
MD
697{
698 int DIRBLKSIZ = VTOI(dp)->i_e2fs->s_blocksize;
699
700 char * error_msg = NULL;
701
702 if (de->rec_len < EXT2_DIR_REC_LEN(1))
703 error_msg = "rec_len is smaller than minimal";
704 else if (de->rec_len % 4 != 0)
705 error_msg = "rec_len % 4 != 0";
706 else if (de->rec_len < EXT2_DIR_REC_LEN(de->name_len))
707 error_msg = "reclen is too small for name_len";
708 else if (entryoffsetinblock + de->rec_len > DIRBLKSIZ)
709 error_msg = "directory entry across blocks";
710 /* else LATER
711 if (de->inode > dir->i_sb->u.ext2_sb.s_es->s_inodes_count)
712 error_msg = "inode out of bounds";
713 */
714
715 if (error_msg != NULL) {
086c1d7e
SW
716 kprintf("bad directory entry: %s\n", error_msg);
717 kprintf("offset=%d, inode=%lu, rec_len=%u, name_len=%u\n",
984263bc
MD
718 entryoffsetinblock, (unsigned long)de->inode,
719 de->rec_len, de->name_len);
720 }
721 return error_msg == NULL ? 0 : 1;
722}
723
724/*
725 * Write a directory entry after a call to namei, using the parameters
fad57d0e 726 * that it left in the directory inode. The argument ip is the inode which
b993bb87 727 * the new directory entry will refer to. Dvp is a pointer to the directory
fad57d0e 728 * to be written, which was left locked by namei. Remaining parameters
984263bc
MD
729 * (dp->i_offset, dp->i_count) indicate how the space for the new
730 * entry is to be obtained.
731 */
732int
0f7f7a49 733ext2_direnter(struct inode *ip, struct vnode *dvp, struct componentname *cnp)
984263bc 734{
f7aae92f
RG
735 struct ext2_dir_entry_2 *ep, *nep;
736 struct inode *dp;
984263bc
MD
737 struct buf *bp;
738 struct ext2_dir_entry_2 newdir;
739 struct iovec aiov;
740 struct uio auio;
741 u_int dsize;
742 int error, loc, newentrysize, spacefree;
743 char *dirbuf;
744 int DIRBLKSIZ = ip->i_e2fs->s_blocksize;
745
746
984263bc
MD
747 dp = VTOI(dvp);
748 newdir.inode = ip->i_number;
749 newdir.name_len = cnp->cn_namelen;
750 if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs->s_es,
751 EXT2_FEATURE_INCOMPAT_FILETYPE))
752 newdir.file_type = DTTOFT(IFTODT(ip->i_mode));
753 else
754 newdir.file_type = EXT2_FT_UNKNOWN;
755 bcopy(cnp->cn_nameptr, newdir.name, (unsigned)cnp->cn_namelen + 1);
756 newentrysize = EXT2_DIR_REC_LEN(newdir.name_len);
757 if (dp->i_count == 0) {
758 /*
759 * If dp->i_count is 0, then namei could find no
760 * space in the directory. Here, dp->i_offset will
761 * be on a directory block boundary and we will write the
762 * new entry into a fresh block.
763 */
764 if (dp->i_offset & (DIRBLKSIZ - 1))
765 panic("ext2_direnter: newblk");
766 auio.uio_offset = dp->i_offset;
767 newdir.rec_len = DIRBLKSIZ;
768 auio.uio_resid = newentrysize;
769 aiov.iov_len = newentrysize;
770 aiov.iov_base = (caddr_t)&newdir;
771 auio.uio_iov = &aiov;
772 auio.uio_iovcnt = 1;
773 auio.uio_rw = UIO_WRITE;
774 auio.uio_segflg = UIO_SYSSPACE;
7b95be2a 775 auio.uio_td = NULL;
984263bc
MD
776 error = VOP_WRITE(dvp, &auio, IO_SYNC, cnp->cn_cred);
777 if (DIRBLKSIZ >
1f1db49f 778 VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
984263bc
MD
779 /* XXX should grow with balloc() */
780 panic("ext2_direnter: frag size");
781 else if (!error) {
782 dp->i_size = roundup(dp->i_size, DIRBLKSIZ);
783 dp->i_flag |= IN_CHANGE;
784 }
785 return (error);
786 }
787
788 /*
789 * If dp->i_count is non-zero, then namei found space
790 * for the new entry in the range dp->i_offset to
791 * dp->i_offset + dp->i_count in the directory.
792 * To use this space, we may have to compact the entries located
793 * there, by copying them together towards the beginning of the
794 * block, leaving the free space in one usable chunk at the end.
795 */
796
797 /*
798 * Increase size of directory if entry eats into new space.
799 * This should never push the size past a new multiple of
800 * DIRBLKSIZE.
801 *
802 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
803 */
804 if (dp->i_offset + dp->i_count > dp->i_size)
805 dp->i_size = dp->i_offset + dp->i_count;
806 /*
807 * Get the block containing the space for the new directory entry.
808 */
1f1db49f 809 if ((error = EXT2_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp)) != 0)
984263bc
MD
810 return (error);
811 /*
812 * Find space for the new entry. In the simple case, the entry at
813 * offset base will have the space. If it does not, then namei
814 * arranged that compacting the region dp->i_offset to
815 * dp->i_offset + dp->i_count would yield the
816 * space.
817 */
818 ep = (struct ext2_dir_entry_2 *)dirbuf;
819 dsize = EXT2_DIR_REC_LEN(ep->name_len);
820 spacefree = ep->rec_len - dsize;
821 for (loc = ep->rec_len; loc < dp->i_count; ) {
822 nep = (struct ext2_dir_entry_2 *)(dirbuf + loc);
823 if (ep->inode) {
824 /* trim the existing slot */
825 ep->rec_len = dsize;
826 ep = (struct ext2_dir_entry_2 *)((char *)ep + dsize);
827 } else {
828 /* overwrite; nothing there; header is ours */
829 spacefree += dsize;
830 }
831 dsize = EXT2_DIR_REC_LEN(nep->name_len);
832 spacefree += nep->rec_len - dsize;
833 loc += nep->rec_len;
834 bcopy((caddr_t)nep, (caddr_t)ep, dsize);
835 }
836 /*
837 * Update the pointer fields in the previous entry (if any),
838 * copy in the new entry, and write out the block.
839 */
840 if (ep->inode == 0) {
841 if (spacefree + dsize < newentrysize)
842 panic("ext2_direnter: compact1");
843 newdir.rec_len = spacefree + dsize;
844 } else {
845 if (spacefree < newentrysize)
846 panic("ext2_direnter: compact2");
847 newdir.rec_len = spacefree;
848 ep->rec_len = dsize;
849 ep = (struct ext2_dir_entry_2 *)((char *)ep + dsize);
850 }
851 bcopy((caddr_t)&newdir, (caddr_t)ep, (u_int)newentrysize);
62cfda27 852 error = bwrite(bp);
984263bc
MD
853 dp->i_flag |= IN_CHANGE | IN_UPDATE;
854 if (!error && dp->i_endoff && dp->i_endoff < dp->i_size)
1f1db49f 855 error = EXT2_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC,
87de5057 856 cnp->cn_cred);
984263bc
MD
857 return (error);
858}
859
860/*
861 * Remove a directory entry after a call to namei, using
fad57d0e 862 * the parameters which it left in the directory inode. The entry
984263bc
MD
863 * dp->i_offset contains the offset into the directory of the
864 * entry to be eliminated. The dp->i_count field contains the
865 * size of the previous record in the directory. If this
866 * is 0, the first entry is being deleted, so we need only
867 * zero the inode number to mark the entry as free. If the
868 * entry is not the first in the directory, we must reclaim
869 * the space of the now empty record by adding the record size
870 * to the size of the previous entry.
871 */
872int
0f7f7a49 873ext2_dirremove(struct vnode *dvp, struct componentname *cnp)
984263bc 874{
f7aae92f 875 struct inode *dp;
984263bc
MD
876 struct ext2_dir_entry_2 *ep;
877 struct buf *bp;
878 int error;
b993bb87 879
984263bc
MD
880 dp = VTOI(dvp);
881 if (dp->i_count == 0) {
882 /*
883 * First entry in block: set d_ino to zero.
884 */
885 if ((error =
1f1db49f 886 EXT2_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp)) != 0)
984263bc
MD
887 return (error);
888 ep->inode = 0;
62cfda27 889 error = bwrite(bp);
984263bc
MD
890 dp->i_flag |= IN_CHANGE | IN_UPDATE;
891 return (error);
892 }
893 /*
894 * Collapse new free space into previous entry.
895 */
1f1db49f 896 if ((error = EXT2_BLKATOFF(dvp, (off_t)(dp->i_offset - dp->i_count),
984263bc
MD
897 (char **)&ep, &bp)) != 0)
898 return (error);
899 ep->rec_len += dp->i_reclen;
62cfda27 900 error = bwrite(bp);
984263bc
MD
901 dp->i_flag |= IN_CHANGE | IN_UPDATE;
902 return (error);
903}
904
905/*
906 * Rewrite an existing directory entry to point at the inode
907 * supplied. The parameters describing the directory entry are
908 * set up by a call to namei.
909 */
910int
0f7f7a49 911ext2_dirrewrite(struct inode *dp, struct inode *ip, struct componentname *cnp)
984263bc
MD
912{
913 struct buf *bp;
914 struct ext2_dir_entry_2 *ep;
915 struct vnode *vdp = ITOV(dp);
916 int error;
917
1f1db49f 918 if ((error = EXT2_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp)) != 0)
984263bc
MD
919 return (error);
920 ep->inode = ip->i_number;
921 if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs->s_es,
922 EXT2_FEATURE_INCOMPAT_FILETYPE))
923 ep->file_type = DTTOFT(IFTODT(ip->i_mode));
924 else
925 ep->file_type = EXT2_FT_UNKNOWN;
62cfda27 926 error = bwrite(bp);
984263bc
MD
927 dp->i_flag |= IN_CHANGE | IN_UPDATE;
928 return (error);
929}
930
931/*
932 * Check if a directory is empty or not.
933 * Inode supplied must be locked.
934 *
935 * Using a struct dirtemplate here is not precisely
936 * what we want, but better than using a struct direct.
937 *
938 * NB: does not handle corrupted directories.
939 */
940int
0f7f7a49 941ext2_dirempty(struct inode *ip, ino_t parentino, struct ucred *cred)
984263bc 942{
f7aae92f 943 off_t off;
984263bc 944 struct dirtemplate dbuf;
f7aae92f 945 struct ext2_dir_entry_2 *dp = (struct ext2_dir_entry_2 *)&dbuf;
984263bc 946 int error, count, namlen;
b993bb87 947
984263bc
MD
948#define MINDIRSIZ (sizeof (struct dirtemplate) / 2)
949
950 for (off = 0; off < ip->i_size; off += dp->rec_len) {
951 error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off,
87de5057 952 UIO_SYSSPACE, IO_NODELOCKED, cred, &count);
984263bc
MD
953 /*
954 * Since we read MINDIRSIZ, residual must
955 * be 0 unless we're at end of file.
956 */
957 if (error || count != 0)
958 return (0);
959 /* avoid infinite loops */
960 if (dp->rec_len == 0)
961 return (0);
962 /* skip empty entries */
963 if (dp->inode == 0)
964 continue;
965 /* accept only "." and ".." */
966 namlen = dp->name_len;
967 if (namlen > 2)
968 return (0);
969 if (dp->name[0] != '.')
970 return (0);
971 /*
972 * At this point namlen must be 1 or 2.
973 * 1 implies ".", 2 implies ".." if second
974 * char is also "."
975 */
976 if (namlen == 1)
977 continue;
978 if (dp->name[1] == '.' && dp->inode == parentino)
979 continue;
980 return (0);
981 }
982 return (1);
983}
984
985/*
986 * Check if source directory is in the path of the target directory.
987 * Target is supplied locked, source is unlocked.
988 * The target is always vput before returning.
989 */
990int
0f7f7a49 991ext2_checkpath(struct inode *source, struct inode *target, struct ucred *cred)
984263bc
MD
992{
993 struct vnode *vp;
994 int error, rootino, namlen;
995 struct dirtemplate dirbuf;
996
997 vp = ITOV(target);
998 if (target->i_number == source->i_number) {
999 error = EEXIST;
1000 goto out;
1001 }
1002 rootino = ROOTINO;
1003 error = 0;
1004 if (target->i_number == rootino)
1005 goto out;
1006
1007 for (;;) {
1008 if (vp->v_type != VDIR) {
1009 error = ENOTDIR;
1010 break;
1011 }
1012 error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
87de5057 1013 sizeof (struct dirtemplate), (off_t)0,
60233e58 1014 UIO_SYSSPACE, IO_NODELOCKED, cred, NULL);
984263bc
MD
1015 if (error != 0)
1016 break;
1017 namlen = dirbuf.dotdot_type; /* like ufs little-endian */
1018 if (namlen != 2 ||
1019 dirbuf.dotdot_name[0] != '.' ||
1020 dirbuf.dotdot_name[1] != '.') {
1021 error = ENOTDIR;
1022 break;
1023 }
1024 if (dirbuf.dotdot_ino == source->i_number) {
1025 error = EINVAL;
1026 break;
1027 }
1028 if (dirbuf.dotdot_ino == rootino)
1029 break;
1030 vput(vp);
b9b0a6d0 1031 if ((error = VFS_VGET(vp->v_mount, NULL, dirbuf.dotdot_ino, &vp)) != 0) {
984263bc
MD
1032 vp = NULL;
1033 break;
1034 }
1035 }
1036
1037out:
1038 if (error == ENOTDIR)
086c1d7e 1039 kprintf("checkpath: .. not a directory\n");
984263bc
MD
1040 if (vp != NULL)
1041 vput(vp);
1042 return (error);
1043}