gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* modified for Lites 1.1
	3	*
	4	* Aug 1995, Godmar Back (gback@cs.utah.edu)
	5	* University of Utah, Department of Computer Science
	6	*
	7	* $FreeBSD: src/sys/gnu/ext2fs/ext2_lookup.c,v 1.21.2.3 2002/11/17 02:02:42 bde Exp $
	8	* $DragonFly: src/sys/vfs/gnu/ext2fs/ext2_lookup.c,v 1.21 2006/05/05 21:15:09 dillon Exp $
	9	*/
	10	/*
	11	* Copyright (c) 1989, 1993
	12	* The Regents of the University of California. All rights reserved.
	13	* (c) UNIX System Laboratories, Inc.
	14	* All or some portions of this file are derived from material licensed
	15	* to the University of California by American Telephone and Telegraph
	16	* Co. or Unix System Laboratories, Inc. and are reproduced herein with
	17	* the permission of UNIX System Laboratories, Inc.
	18	*
	19	* Redistribution and use in source and binary forms, with or without
	20	* modification, are permitted provided that the following conditions
	21	* are met:
	22	* 1. Redistributions of source code must retain the above copyright
	23	* notice, this list of conditions and the following disclaimer.
	24	* 2. Redistributions in binary form must reproduce the above copyright
	25	* notice, this list of conditions and the following disclaimer in the
	26	* documentation and/or other materials provided with the distribution.
	27	* 3. All advertising materials mentioning features or use of this software
	28	* must display the following acknowledgement:
	29	* This product includes software developed by the University of
	30	* California, Berkeley and its contributors.
	31	* 4. Neither the name of the University nor the names of its contributors
	32	* may be used to endorse or promote products derived from this software
	33	* without specific prior written permission.
	34	*
	35	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	36	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	37	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	38	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	39	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	40	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	41	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	42	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	43	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	44	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	45	* SUCH DAMAGE.
	46	*
	47	* @(#)ufs_lookup.c 8.6 (Berkeley) 4/1/94
	48	*/
	49
	50	#include <sys/param.h>
	51	#include <sys/systm.h>
	52	#include <sys/namei.h>
	53	#include <sys/buf.h>
	54	#include <sys/mount.h>
	55	#include <sys/vnode.h>
	56	#include <sys/malloc.h>
	57	#include <sys/dirent.h>
	58
	59	#include "quota.h"
	60	#include "inode.h"
	61	#include "dir.h"
	62	#include "ext2mount.h"
	63	#include "ext2_extern.h"
	64	#include "ext2_fs.h"
	65	#include "ext2_fs_sb.h"
	66
	67	/*
	68	DIRBLKSIZE in ffs is DEV_BSIZE (in most cases 512)
	69	while it is the native blocksize in ext2fs - thus, a #define
	70	is no longer appropriate
	71	*/
	72	#undef DIRBLKSIZ
	73
	74	extern int dirchk;
	75
	76	static u_char ext2_ft_to_dt[] = {
	77	DT_UNKNOWN, /* EXT2_FT_UNKNOWN */
	78	DT_REG, /* EXT2_FT_REG_FILE */
	79	DT_DIR, /* EXT2_FT_DIR */
	80	DT_CHR, /* EXT2_FT_CHRDEV */
	81	DT_BLK, /* EXT2_FT_BLKDEV */
	82	DT_FIFO, /* EXT2_FT_FIFO */
	83	DT_SOCK, /* EXT2_FT_SOCK */
	84	DT_LNK, /* EXT2_FT_SYMLINK */
	85	};
	86	#define FTTODT(ft) \
	87	((ft) > sizeof(ext2_ft_to_dt) / sizeof(ext2_ft_to_dt[0]) ? \
	88	DT_UNKNOWN : ext2_ft_to_dt[(ft)])
	89
	90	static u_char dt_to_ext2_ft[] = {
	91	EXT2_FT_UNKNOWN, /* DT_UNKNOWN */
	92	EXT2_FT_FIFO, /* DT_FIFO */
	93	EXT2_FT_CHRDEV, /* DT_CHR */
	94	EXT2_FT_UNKNOWN, /* unused */
	95	EXT2_FT_DIR, /* DT_DIR */
	96	EXT2_FT_UNKNOWN, /* unused */
	97	EXT2_FT_BLKDEV, /* DT_BLK */
	98	EXT2_FT_UNKNOWN, /* unused */
	99	EXT2_FT_REG_FILE, /* DT_REG */
	100	EXT2_FT_UNKNOWN, /* unused */
	101	EXT2_FT_SYMLINK, /* DT_LNK */
	102	EXT2_FT_UNKNOWN, /* unused */
	103	EXT2_FT_SOCK, /* DT_SOCK */
	104	EXT2_FT_UNKNOWN, /* unused */
	105	EXT2_FT_UNKNOWN, /* DT_WHT */
	106	};
	107	#define DTTOFT(dt) \
	108	((dt) > sizeof(dt_to_ext2_ft) / sizeof(dt_to_ext2_ft[0]) ? \
	109	EXT2_FT_UNKNOWN : dt_to_ext2_ft[(dt)])
	110
	111	static int ext2_dirbadentry (struct vnode *dp,
	112	struct ext2_dir_entry_2 *de,
	113	int entryoffsetinblock);
	114
	115	/*
	116	* Vnode op for reading directories.
	117	*
	118	* The routine below assumes that the on-disk format of a directory
	119	* is the same as that defined by <sys/dirent.h>. If the on-disk
	120	* format changes, then it will be necessary to do a conversion
	121	* from the on-disk format that read returns to the format defined
	122	* by <sys/dirent.h>.
	123	*/
	124	/*
	125	* this is exactly what we do here - the problem is that the conversion
	126	* will blow up some entries by four bytes, so it can't be done in place.
	127	* This is too bad. Right now the conversion is done entry by entry, the
	128	* converted entry is sent via uiomove.
	129	*
	130	* XXX allocate a buffer, convert as many entries as possible, then send
	131	* the whole buffer to uiomove
	132	*
	133	* ext2_readdir(struct vnode a_vp, struct uio a_uio, struct ucred *a_cred)
	134	*/
	135	int
	136	ext2_readdir(struct vop_readdir_args *ap)
	137	{
	138	struct uio *uio = ap->a_uio;
	139	int count, error;
	140
	141	struct ext2_dir_entry_2 edp, dp;
	142	int ncookies;
	143	struct uio auio;
	144	struct iovec aiov;
	145	caddr_t dirbuf;
	146	int DIRBLKSIZ = VTOI(ap->a_vp)->i_e2fs->s_blocksize;
	147	int readcnt, retval;
	148	off_t startoffset = uio->uio_offset;
	149
	150	count = uio->uio_resid;
	151	/*
	152	* Avoid complications for partial directory entries by adjusting
	153	* the i/o to end at a block boundary. Don't give up (like ufs
	154	* does) if the initial adjustment gives a negative count, since
	155	* many callers don't supply a large enough buffer. The correct
	156	* size is a little larger than DIRBLKSIZ to allow for expansion
	157	* of directory entries, but some callers just use 512.
	158	*/
	159	count -= (uio->uio_offset + count) & (DIRBLKSIZ -1);
	160	if (count <= 0)
	161	count += DIRBLKSIZ;
	162
	163	#ifdef EXT2FS_DEBUG
	164	printf("ext2_readdir: uio_offset = %lld, uio_resid = %d, count = %d\n",
	165	uio->uio_offset, uio->uio_resid, count);
	166	#endif
	167
	168	auio = *uio;
	169	auio.uio_iov = &aiov;
	170	auio.uio_iovcnt = 1;
	171	auio.uio_resid = count;
	172	auio.uio_segflg = UIO_SYSSPACE;
	173	aiov.iov_len = count;
	174	MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK);
	175	aiov.iov_base = dirbuf;
	176	error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
	177	if (error == 0) {
	178	readcnt = count - auio.uio_resid;
	179	edp = (struct ext2_dir_entry_2 *)&dirbuf[readcnt];
	180	ncookies = 0;
	181	for (dp = (struct ext2_dir_entry_2 *)dirbuf;
	182	!error && uio->uio_resid > 0 && dp < edp; ) {
	183	/*-
	184	* "New" ext2fs directory entries differ in 3 ways
	185	* from ufs on-disk ones:
	186	* - the name is not necessarily NUL-terminated.
	187	* - the file type field always exists and always
	188	* follows the name length field.
	189	* - the file type is encoded in a different way.
	190	*
	191	* "Old" ext2fs directory entries need no special
	192	* conversions, since they binary compatible with
	193	* "new" entries having a file type of 0 (i.e.,
	194	* EXT2_FT_UNKNOWN). Splitting the old name length
	195	* field didn't make a mess like it did in ufs,
	196	* because ext2fs uses a machine-dependent disk
	197	* layout.
	198	*/
	199	if (dp->rec_len <= 0) {
	200	error = EIO;
	201	break;
	202	}
	203	retval = vop_write_dirent(&error, uio, dp->inode,
	204	FTTODT(dp->file_type), dp->name_len, dp->name);
	205
	206	if (retval)
	207	break;
	208	/* advance dp */
	209	dp = (struct ext2_dir_entry_2 )((char )dp + dp->rec_len);
	210	if (!error)
	211	ncookies++;
	212	}
	213	/* we need to correct uio_offset */
	214	uio->uio_offset = startoffset + (caddr_t)dp - dirbuf;
	215
	216	if (!error && ap->a_ncookies != NULL) {
	217	u_long cookiep, cookies, *ecookies;
	218	off_t off;
	219
	220	if (uio->uio_segflg != UIO_SYSSPACE \|\| uio->uio_iovcnt != 1)
	221	panic("ext2fs_readdir: unexpected uio from NFS server");
	222	MALLOC(cookies, u_long , ncookies sizeof(u_long), M_TEMP,
	223	M_WAITOK);
	224	off = startoffset;
	225	for (dp = (struct ext2_dir_entry_2 *)dirbuf,
	226	cookiep = cookies, ecookies = cookies + ncookies;
	227	cookiep < ecookies;
	228	dp = (struct ext2_dir_entry_2 *)((caddr_t) dp + dp->rec_len)) {
	229	off += dp->rec_len;
	230	*cookiep++ = (u_long) off;
	231	}
	232	*ap->a_ncookies = ncookies;
	233	*ap->a_cookies = cookies;
	234	}
	235	}
	236	FREE(dirbuf, M_TEMP);
	237	if (ap->a_eofflag)
	238	*ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset;
	239	return (error);
	240	}
	241
	242	/*
	243	* Convert a component of a pathname into a pointer to a locked inode.
	244	* This is a very central and rather complicated routine.
	245	* If the file system is not maintained in a strict tree hierarchy,
	246	* this can result in a deadlock situation (see comments in code below).
	247	*
	248	* The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
	249	* on whether the name is to be looked up, created, renamed, or deleted.
	250	* When CREATE, RENAME, or DELETE is specified, information usable in
	251	* creating, renaming, or deleting a directory entry may be calculated.
	252	* If flag has LOCKPARENT or'ed into it and the target of the pathname
	253	* exists, lookup returns both the target and its parent directory locked.
	254	* When creating or renaming and LOCKPARENT is specified, the target may
	255	* not be ".". When deleting and LOCKPARENT is specified, the target may
	256	* be "."., but the caller must check to ensure it does an vrele and vput
	257	* instead of two vputs.
	258	*
	259	* Overall outline of ufs_lookup:
	260	*
	261	* search for name in directory, to found or notfound
	262	* notfound:
	263	* if creating, return locked directory, leaving info on available slots
	264	* else return error
	265	* found:
	266	* if at end of path and deleting, return information to allow delete
	267	* if at end of path and rewriting (RENAME and LOCKPARENT), lock target
	268	* inode and return info to allow rewrite
	269	* if not at end, add name to cache; if at end and neither creating
	270	* nor deleting, add name to cache
	271	*
	272	* ext2_lookup(struct vnode a_dvp, struct vnode *a_vpp,
	273	* struct componentname *a_cnp)
	274	*/
	275	int
	276	ext2_lookup(struct vop_old_lookup_args *ap)
	277	{
	278	struct vnode vdp; / vnode for directory being searched */
	279	struct inode dp; / inode for directory being searched */
	280	struct buf bp; / a buffer of directory entries */
	281	struct ext2_dir_entry_2 ep; / the current directory entry */
	282	int entryoffsetinblock; /* offset of ep in bp's buffer */
	283	enum {NONE, COMPACT, FOUND} slotstatus;
	284	doff_t slotoffset; /* offset of area with free space */
	285	int slotsize; /* size of area at slotoffset */
	286	int slotfreespace; /* amount of space free in slot */
	287	int slotneeded; /* size of the entry we're seeking */
	288	int numdirpasses; /* strategy for directory search */
	289	doff_t endsearch; /* offset to end directory search */
	290	doff_t prevoff; /* prev entry dp->i_offset */
	291	struct vnode pdp; / saved dp during symlink work */
	292	struct vnode tdp; / returned by VFS_VGET */
	293	doff_t enduseful; /* pointer past last used dir slot */
	294	u_long bmask; /* block offset mask */
	295	int lockparent; /* 1 => lockparent flag is set */
	296	int wantparent; /* 1 => wantparent or lockparent flag */
	297	int namlen, error;
	298	struct vnode **vpp = ap->a_vpp;
	299	struct componentname *cnp = ap->a_cnp;
	300	struct ucred *cred = cnp->cn_cred;
	301	int flags = cnp->cn_flags;
	302	int nameiop = cnp->cn_nameiop;
	303	globaldata_t gd = mycpu;
	304
	305	int DIRBLKSIZ = VTOI(ap->a_dvp)->i_e2fs->s_blocksize;
	306
	307	bp = NULL;
	308	slotoffset = -1;
	309	*vpp = NULL;
	310	vdp = ap->a_dvp;
	311	dp = VTOI(vdp);
	312	lockparent = flags & CNP_LOCKPARENT;
	313	wantparent = flags & (CNP_LOCKPARENT\|CNP_WANTPARENT);
	314
	315	/*
	316	* We now have a segment name to search for, and a directory to search.
	317	*/
	318
	319	/*
	320	* Suppress search for slots unless creating
	321	* file and at end of pathname, in which case
	322	* we watch for a place to put the new file in
	323	* case it doesn't already exist.
	324	*/
	325	slotstatus = FOUND;
	326	slotfreespace = slotsize = slotneeded = 0;
	327	if (nameiop == NAMEI_CREATE \|\| nameiop == NAMEI_RENAME) {
	328	slotstatus = NONE;
	329	slotneeded = EXT2_DIR_REC_LEN(cnp->cn_namelen);
	330	/* was
	331	slotneeded = (sizeof(struct direct) - MAXNAMLEN +
	332	cnp->cn_namelen + 3) &~ 3; */
	333	}
	334
	335	/*
	336	* If there is cached information on a previous search of
	337	* this directory, pick up where we last left off.
	338	* We cache only lookups as these are the most common
	339	* and have the greatest payoff. Caching CREATE has little
	340	* benefit as it usually must search the entire directory
	341	* to determine that the entry does not exist. Caching the
	342	* location of the last DELETE or RENAME has not reduced
	343	* profiling time and hence has been removed in the interest
	344	* of simplicity.
	345	*/
	346	bmask = VFSTOEXT2(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
	347	if (nameiop != NAMEI_LOOKUP \|\| dp->i_diroff == 0 \|\|
	348	dp->i_diroff > dp->i_size) {
	349	entryoffsetinblock = 0;
	350	dp->i_offset = 0;
	351	numdirpasses = 1;
	352	} else {
	353	dp->i_offset = dp->i_diroff;
	354	if ((entryoffsetinblock = dp->i_offset & bmask) &&
	355	(error = EXT2_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp)))
	356	return (error);
	357	numdirpasses = 2;
	358	gd->gd_nchstats->ncs_2passes++;
	359	}
	360	prevoff = dp->i_offset;
	361	endsearch = roundup(dp->i_size, DIRBLKSIZ);
	362	enduseful = 0;
	363
	364	searchloop:
	365	while (dp->i_offset < endsearch) {
	366	/*
	367	* If necessary, get the next directory block.
	368	*/
	369	if ((dp->i_offset & bmask) == 0) {
	370	if (bp != NULL)
	371	brelse(bp);
	372	if ((error =
	373	EXT2_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp)) != 0)
	374	return (error);
	375	entryoffsetinblock = 0;
	376	}
	377	/*
	378	* If still looking for a slot, and at a DIRBLKSIZE
	379	* boundary, have to start looking for free space again.
	380	*/
	381	if (slotstatus == NONE &&
	382	(entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
	383	slotoffset = -1;
	384	slotfreespace = 0;
	385	}
	386	/*
	387	* Get pointer to next entry.
	388	* Full validation checks are slow, so we only check
	389	* enough to insure forward progress through the
	390	* directory. Complete checks can be run by patching
	391	* "dirchk" to be true.
	392	*/
	393	ep = (struct ext2_dir_entry_2 *)
	394	((char *)bp->b_data + entryoffsetinblock);
	395	if (ep->rec_len == 0 \|\|
	396	(dirchk && ext2_dirbadentry(vdp, ep, entryoffsetinblock))) {
	397	int i;
	398	ext2_dirbad(dp, dp->i_offset, "mangled entry");
	399	i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
	400	dp->i_offset += i;
	401	entryoffsetinblock += i;
	402	continue;
	403	}
	404
	405	/*
	406	* If an appropriate sized slot has not yet been found,
	407	* check to see if one is available. Also accumulate space
	408	* in the current block so that we can determine if
	409	* compaction is viable.
	410	*/
	411	if (slotstatus != FOUND) {
	412	int size = ep->rec_len;
	413
	414	if (ep->inode != 0)
	415	size -= EXT2_DIR_REC_LEN(ep->name_len);
	416	if (size > 0) {
	417	if (size >= slotneeded) {
	418	slotstatus = FOUND;
	419	slotoffset = dp->i_offset;
	420	slotsize = ep->rec_len;
	421	} else if (slotstatus == NONE) {
	422	slotfreespace += size;
	423	if (slotoffset == -1)
	424	slotoffset = dp->i_offset;
	425	if (slotfreespace >= slotneeded) {
	426	slotstatus = COMPACT;
	427	slotsize = dp->i_offset +
	428	ep->rec_len - slotoffset;
	429	}
	430	}
	431	}
	432	}
	433
	434	/*
	435	* Check for a name match.
	436	*/
	437	if (ep->inode) {
	438	namlen = ep->name_len;
	439	if (namlen == cnp->cn_namelen &&
	440	!bcmp(cnp->cn_nameptr, ep->name,
	441	(unsigned)namlen)) {
	442	/*
	443	* Save directory entry's inode number and
	444	* reclen in ndp->ni_ufs area, and release
	445	* directory buffer.
	446	*/
	447	dp->i_ino = ep->inode;
	448	dp->i_reclen = ep->rec_len;
	449	goto found;
	450	}
	451	}
	452	prevoff = dp->i_offset;
	453	dp->i_offset += ep->rec_len;
	454	entryoffsetinblock += ep->rec_len;
	455	if (ep->inode)
	456	enduseful = dp->i_offset;
	457	}
	458	/* notfound: */
	459	/*
	460	* If we started in the middle of the directory and failed
	461	* to find our target, we must check the beginning as well.
	462	*/
	463	if (numdirpasses == 2) {
	464	numdirpasses--;
	465	dp->i_offset = 0;
	466	endsearch = dp->i_diroff;
	467	goto searchloop;
	468	}
	469	if (bp != NULL)
	470	brelse(bp);
	471	/*
	472	* If creating, and at end of pathname and current
	473	* directory has not been removed, then can consider
	474	* allowing file to be created.
	475	*/
	476	if ((nameiop == NAMEI_CREATE \|\| nameiop == NAMEI_RENAME) &&
	477	dp->i_nlink != 0) {
	478	/*
	479	* Access for write is interpreted as allowing
	480	* creation of files in the directory.
	481	*/
	482	if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_td)) != 0)
	483	return (error);
	484	/*
	485	* Return an indication of where the new directory
	486	* entry should be put. If we didn't find a slot,
	487	* then set dp->i_count to 0 indicating
	488	* that the new slot belongs at the end of the
	489	* directory. If we found a slot, then the new entry
	490	* can be put in the range from dp->i_offset to
	491	* dp->i_offset + dp->i_count.
	492	*/
	493	if (slotstatus == NONE) {
	494	dp->i_offset = roundup(dp->i_size, DIRBLKSIZ);
	495	dp->i_count = 0;
	496	enduseful = dp->i_offset;
	497	} else {
	498	dp->i_offset = slotoffset;
	499	dp->i_count = slotsize;
	500	if (enduseful < slotoffset + slotsize)
	501	enduseful = slotoffset + slotsize;
	502	}
	503	dp->i_endoff = roundup(enduseful, DIRBLKSIZ);
	504	dp->i_flag \|= IN_CHANGE \| IN_UPDATE;
	505	/*
	506	* We return with the directory locked, so that
	507	* the parameters we set up above will still be
	508	* valid if we actually decide to do a direnter().
	509	* We return ni_vp == NULL to indicate that the entry
	510	* does not currently exist; we leave a pointer to
	511	* the (locked) directory inode in ndp->ni_dvp.
	512	* The pathname buffer is saved so that the name
	513	* can be obtained later.
	514	*
	515	* NB - if the directory is unlocked, then this
	516	* information cannot be used.
	517	*/
	518	if (!lockparent)
	519	VOP_UNLOCK(vdp, 0);
	520	return (EJUSTRETURN);
	521	}
	522	return (ENOENT);
	523
	524	found:
	525	if (numdirpasses == 2)
	526	gd->gd_nchstats->ncs_pass2++;
	527	/*
	528	* Check that directory length properly reflects presence
	529	* of this entry.
	530	*/
	531	if (entryoffsetinblock + EXT2_DIR_REC_LEN(ep->name_len)
	532	> dp->i_size) {
	533	ext2_dirbad(dp, dp->i_offset, "i_size too small");
	534	dp->i_size = entryoffsetinblock+EXT2_DIR_REC_LEN(ep->name_len);
	535	dp->i_flag \|= IN_CHANGE \| IN_UPDATE;
	536	}
	537	brelse(bp);
	538
	539	/*
	540	* Found component in pathname.
	541	* If the final component of path name, save information
	542	* in the cache as to where the entry was found.
	543	*/
	544	if (nameiop == NAMEI_LOOKUP)
	545	dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1);
	546
	547	/*
	548	* If deleting, and at end of pathname, return
	549	* parameters which can be used to remove file.
	550	* If the wantparent flag isn't set, we return only
	551	* the directory (in ndp->ni_dvp), otherwise we go
	552	* on and lock the inode, being careful with ".".
	553	*/
	554	if (nameiop == NAMEI_DELETE) {
	555	/*
	556	* Write access to directory required to delete files.
	557	*/
	558	if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_td)) != 0)
	559	return (error);
	560	/*
	561	* Return pointer to current entry in dp->i_offset,
	562	* and distance past previous entry (if there
	563	* is a previous entry in this block) in dp->i_count.
	564	* Save directory inode pointer in ndp->ni_dvp for dirremove().
	565	*/
	566	if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
	567	dp->i_count = 0;
	568	else
	569	dp->i_count = dp->i_offset - prevoff;
	570	if (dp->i_number == dp->i_ino) {
	571	vref(vdp);
	572	*vpp = vdp;
	573	return (0);
	574	}
	575	if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0)
	576	return (error);
	577	/*
	578	* If directory is "sticky", then user must own
	579	* the directory, or the file in it, else she
	580	* may not delete it (unless she's root). This
	581	* implements append-only directories.
	582	*/
	583	if ((dp->i_mode & ISVTX) &&
	584	cred->cr_uid != 0 &&
	585	cred->cr_uid != dp->i_uid &&
	586	VTOI(tdp)->i_uid != cred->cr_uid) {
	587	vput(tdp);
	588	return (EPERM);
	589	}
	590	*vpp = tdp;
	591	if (!lockparent)
	592	VOP_UNLOCK(vdp, 0);
	593	return (0);
	594	}
	595
	596	/*
	597	* If rewriting (RENAME), return the inode and the
	598	* information required to rewrite the present directory
	599	* Must get inode of directory entry to verify it's a
	600	* regular file, or empty directory.
	601	*/
	602	if (nameiop == NAMEI_RENAME && wantparent) {
	603	if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_td)) != 0)
	604	return (error);
	605	/*
	606	* Careful about locking second inode.
	607	* This can only occur if the target is ".".
	608	*/
	609	if (dp->i_number == dp->i_ino)
	610	return (EISDIR);
	611	if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0)
	612	return (error);
	613	*vpp = tdp;
	614	if (!lockparent)
	615	VOP_UNLOCK(vdp, 0);
	616	return (0);
	617	}
	618
	619	/*
	620	* Step through the translation in the name. We do not `vput' the
	621	* directory because we may need it again if a symbolic link
	622	* is relative to the current directory. Instead we save it
	623	* unlocked as "pdp". We must get the target inode before unlocking
	624	* the directory to insure that the inode will not be removed
	625	* before we get it. We prevent deadlock by always fetching
	626	* inodes from the root, moving down the directory tree. Thus
	627	* when following backward pointers ".." we must unlock the
	628	* parent directory before getting the requested directory.
	629	* There is a potential race condition here if both the current
	630	* and parent directories are removed before the VFS_VGET for the
	631	* inode associated with ".." returns. We hope that this occurs
	632	* infrequently since we cannot avoid this race condition without
	633	* implementing a sophisticated deadlock detection algorithm.
	634	* Note also that this simple deadlock detection scheme will not
	635	* work if the file system has any hard links other than ".."
	636	* that point backwards in the directory structure.
	637	*/
	638	pdp = vdp;
	639	if (flags & CNP_ISDOTDOT) {
	640	VOP_UNLOCK(pdp, 0); /* race to get the inode */
	641	if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0) {
	642	vn_lock(pdp, LK_EXCLUSIVE \| LK_RETRY);
	643	return (error);
	644	}
	645	if (lockparent && (error = vn_lock(pdp, LK_EXCLUSIVE))) {
	646	vput(tdp);
	647	return (error);
	648	}
	649	*vpp = tdp;
	650	} else if (dp->i_number == dp->i_ino) {
	651	vref(vdp); /* we want ourself, ie "." */
	652	*vpp = vdp;
	653	} else {
	654	if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0)
	655	return (error);
	656	if (!lockparent)
	657	VOP_UNLOCK(pdp, 0);
	658	*vpp = tdp;
	659	}
	660	return (0);
	661	}
	662
	663	void
	664	ext2_dirbad(struct inode ip, doff_t offset, char how)
	665	{
	666	struct mount *mp;
	667
	668	mp = ITOV(ip)->v_mount;
	669	printf("%s: bad dir ino %lu at offset %ld: %s\n",
	670	mp->mnt_stat.f_mntfromname, (u_long)ip->i_number,
	671	(long)offset, how);
	672	if ((mp->mnt_flag & MNT_RDONLY) == 0)
	673	panic("ufs_dirbad: bad dir");
	674	}
	675
	676	/*
	677	* Do consistency checking on a directory entry:
	678	* record length must be multiple of 4
	679	* entry must fit in rest of its DIRBLKSIZ block
	680	* record must be large enough to contain entry
	681	* name is not longer than MAXNAMLEN
	682	* name must be as long as advertised, and null terminated
	683	*/
	684	/*
	685	* changed so that it confirms to ext2_check_dir_entry
	686	*/
	687	static int
	688	ext2_dirbadentry(struct vnode dp, struct ext2_dir_entry_2 de,
	689	int entryoffsetinblock)
	690	{
	691	int DIRBLKSIZ = VTOI(dp)->i_e2fs->s_blocksize;
	692
	693	char * error_msg = NULL;
	694
	695	if (de->rec_len < EXT2_DIR_REC_LEN(1))
	696	error_msg = "rec_len is smaller than minimal";
	697	else if (de->rec_len % 4 != 0)
	698	error_msg = "rec_len % 4 != 0";
	699	else if (de->rec_len < EXT2_DIR_REC_LEN(de->name_len))
	700	error_msg = "reclen is too small for name_len";
	701	else if (entryoffsetinblock + de->rec_len > DIRBLKSIZ)
	702	error_msg = "directory entry across blocks";
	703	/* else LATER
	704	if (de->inode > dir->i_sb->u.ext2_sb.s_es->s_inodes_count)
	705	error_msg = "inode out of bounds";
	706	*/
	707
	708	if (error_msg != NULL) {
	709	printf("bad directory entry: %s\n", error_msg);
	710	printf("offset=%d, inode=%lu, rec_len=%u, name_len=%u\n",
	711	entryoffsetinblock, (unsigned long)de->inode,
	712	de->rec_len, de->name_len);
	713	}
	714	return error_msg == NULL ? 0 : 1;
	715	}
	716
	717	/*
	718	* Write a directory entry after a call to namei, using the parameters
	719	* that it left in the directory inode. The argument ip is the inode which
	720	* the new directory entry will refer to. Dvp is a pointer to the directory
	721	* to be written, which was left locked by namei. Remaining parameters
	722	* (dp->i_offset, dp->i_count) indicate how the space for the new
	723	* entry is to be obtained.
	724	*/
	725	int
	726	ext2_direnter(struct inode ip, struct vnode dvp, struct componentname *cnp)
	727	{
	728	struct ext2_dir_entry_2 ep, nep;
	729	struct inode *dp;
	730	struct buf *bp;
	731	struct ext2_dir_entry_2 newdir;
	732	struct iovec aiov;
	733	struct uio auio;
	734	u_int dsize;
	735	int error, loc, newentrysize, spacefree;
	736	char *dirbuf;
	737	int DIRBLKSIZ = ip->i_e2fs->s_blocksize;
	738
	739
	740	dp = VTOI(dvp);
	741	newdir.inode = ip->i_number;
	742	newdir.name_len = cnp->cn_namelen;
	743	if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs->s_es,
	744	EXT2_FEATURE_INCOMPAT_FILETYPE))
	745	newdir.file_type = DTTOFT(IFTODT(ip->i_mode));
	746	else
	747	newdir.file_type = EXT2_FT_UNKNOWN;
	748	bcopy(cnp->cn_nameptr, newdir.name, (unsigned)cnp->cn_namelen + 1);
	749	newentrysize = EXT2_DIR_REC_LEN(newdir.name_len);
	750	if (dp->i_count == 0) {
	751	/*
	752	* If dp->i_count is 0, then namei could find no
	753	* space in the directory. Here, dp->i_offset will
	754	* be on a directory block boundary and we will write the
	755	* new entry into a fresh block.
	756	*/
	757	if (dp->i_offset & (DIRBLKSIZ - 1))
	758	panic("ext2_direnter: newblk");
	759	auio.uio_offset = dp->i_offset;
	760	newdir.rec_len = DIRBLKSIZ;
	761	auio.uio_resid = newentrysize;
	762	aiov.iov_len = newentrysize;
	763	aiov.iov_base = (caddr_t)&newdir;
	764	auio.uio_iov = &aiov;
	765	auio.uio_iovcnt = 1;
	766	auio.uio_rw = UIO_WRITE;
	767	auio.uio_segflg = UIO_SYSSPACE;
	768	auio.uio_td = NULL;
	769	error = VOP_WRITE(dvp, &auio, IO_SYNC, cnp->cn_cred);
	770	if (DIRBLKSIZ >
	771	VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
	772	/* XXX should grow with balloc() */
	773	panic("ext2_direnter: frag size");
	774	else if (!error) {
	775	dp->i_size = roundup(dp->i_size, DIRBLKSIZ);
	776	dp->i_flag \|= IN_CHANGE;
	777	}
	778	return (error);
	779	}
	780
	781	/*
	782	* If dp->i_count is non-zero, then namei found space
	783	* for the new entry in the range dp->i_offset to
	784	* dp->i_offset + dp->i_count in the directory.
	785	* To use this space, we may have to compact the entries located
	786	* there, by copying them together towards the beginning of the
	787	* block, leaving the free space in one usable chunk at the end.
	788	*/
	789
	790	/*
	791	* Increase size of directory if entry eats into new space.
	792	* This should never push the size past a new multiple of
	793	* DIRBLKSIZE.
	794	*
	795	* N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
	796	*/
	797	if (dp->i_offset + dp->i_count > dp->i_size)
	798	dp->i_size = dp->i_offset + dp->i_count;
	799	/*
	800	* Get the block containing the space for the new directory entry.
	801	*/
	802	if ((error = EXT2_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp)) != 0)
	803	return (error);
	804	/*
	805	* Find space for the new entry. In the simple case, the entry at
	806	* offset base will have the space. If it does not, then namei
	807	* arranged that compacting the region dp->i_offset to
	808	* dp->i_offset + dp->i_count would yield the
	809	* space.
	810	*/
	811	ep = (struct ext2_dir_entry_2 *)dirbuf;
	812	dsize = EXT2_DIR_REC_LEN(ep->name_len);
	813	spacefree = ep->rec_len - dsize;
	814	for (loc = ep->rec_len; loc < dp->i_count; ) {
	815	nep = (struct ext2_dir_entry_2 *)(dirbuf + loc);
	816	if (ep->inode) {
	817	/* trim the existing slot */
	818	ep->rec_len = dsize;
	819	ep = (struct ext2_dir_entry_2 )((char )ep + dsize);
	820	} else {
	821	/* overwrite; nothing there; header is ours */
	822	spacefree += dsize;
	823	}
	824	dsize = EXT2_DIR_REC_LEN(nep->name_len);
	825	spacefree += nep->rec_len - dsize;
	826	loc += nep->rec_len;
	827	bcopy((caddr_t)nep, (caddr_t)ep, dsize);
	828	}
	829	/*
	830	* Update the pointer fields in the previous entry (if any),
	831	* copy in the new entry, and write out the block.
	832	*/
	833	if (ep->inode == 0) {
	834	if (spacefree + dsize < newentrysize)
	835	panic("ext2_direnter: compact1");
	836	newdir.rec_len = spacefree + dsize;
	837	} else {
	838	if (spacefree < newentrysize)
	839	panic("ext2_direnter: compact2");
	840	newdir.rec_len = spacefree;
	841	ep->rec_len = dsize;
	842	ep = (struct ext2_dir_entry_2 )((char )ep + dsize);
	843	}
	844	bcopy((caddr_t)&newdir, (caddr_t)ep, (u_int)newentrysize);
	845	error = bwrite(bp);
	846	dp->i_flag \|= IN_CHANGE \| IN_UPDATE;
	847	if (!error && dp->i_endoff && dp->i_endoff < dp->i_size)
	848	error = EXT2_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC,
	849	cnp->cn_cred, cnp->cn_td);
	850	return (error);
	851	}
	852
	853	/*
	854	* Remove a directory entry after a call to namei, using
	855	* the parameters which it left in the directory inode. The entry
	856	* dp->i_offset contains the offset into the directory of the
	857	* entry to be eliminated. The dp->i_count field contains the
	858	* size of the previous record in the directory. If this
	859	* is 0, the first entry is being deleted, so we need only
	860	* zero the inode number to mark the entry as free. If the
	861	* entry is not the first in the directory, we must reclaim
	862	* the space of the now empty record by adding the record size
	863	* to the size of the previous entry.
	864	*/
	865	int
	866	ext2_dirremove(struct vnode dvp, struct componentname cnp)
	867	{
	868	struct inode *dp;
	869	struct ext2_dir_entry_2 *ep;
	870	struct buf *bp;
	871	int error;
	872
	873	dp = VTOI(dvp);
	874	if (dp->i_count == 0) {
	875	/*
	876	* First entry in block: set d_ino to zero.
	877	*/
	878	if ((error =
	879	EXT2_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp)) != 0)
	880	return (error);
	881	ep->inode = 0;
	882	error = bwrite(bp);
	883	dp->i_flag \|= IN_CHANGE \| IN_UPDATE;
	884	return (error);
	885	}
	886	/*
	887	* Collapse new free space into previous entry.
	888	*/
	889	if ((error = EXT2_BLKATOFF(dvp, (off_t)(dp->i_offset - dp->i_count),
	890	(char **)&ep, &bp)) != 0)
	891	return (error);
	892	ep->rec_len += dp->i_reclen;
	893	error = bwrite(bp);
	894	dp->i_flag \|= IN_CHANGE \| IN_UPDATE;
	895	return (error);
	896	}
	897
	898	/*
	899	* Rewrite an existing directory entry to point at the inode
	900	* supplied. The parameters describing the directory entry are
	901	* set up by a call to namei.
	902	*/
	903	int
	904	ext2_dirrewrite(struct inode dp, struct inode ip, struct componentname *cnp)
	905	{
	906	struct buf *bp;
	907	struct ext2_dir_entry_2 *ep;
	908	struct vnode *vdp = ITOV(dp);
	909	int error;
	910
	911	if ((error = EXT2_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp)) != 0)
	912	return (error);
	913	ep->inode = ip->i_number;
	914	if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs->s_es,
	915	EXT2_FEATURE_INCOMPAT_FILETYPE))
	916	ep->file_type = DTTOFT(IFTODT(ip->i_mode));
	917	else
	918	ep->file_type = EXT2_FT_UNKNOWN;
	919	error = bwrite(bp);
	920	dp->i_flag \|= IN_CHANGE \| IN_UPDATE;
	921	return (error);
	922	}
	923
	924	/*
	925	* Check if a directory is empty or not.
	926	* Inode supplied must be locked.
	927	*
	928	* Using a struct dirtemplate here is not precisely
	929	* what we want, but better than using a struct direct.
	930	*
	931	* NB: does not handle corrupted directories.
	932	*/
	933	int
	934	ext2_dirempty(struct inode ip, ino_t parentino, struct ucred cred)
	935	{
	936	off_t off;
	937	struct dirtemplate dbuf;
	938	struct ext2_dir_entry_2 dp = (struct ext2_dir_entry_2 )&dbuf;
	939	int error, count, namlen;
	940
	941	#define MINDIRSIZ (sizeof (struct dirtemplate) / 2)
	942
	943	for (off = 0; off < ip->i_size; off += dp->rec_len) {
	944	error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off,
	945	UIO_SYSSPACE, IO_NODELOCKED, cred, &count, NULL);
	946	/*
	947	* Since we read MINDIRSIZ, residual must
	948	* be 0 unless we're at end of file.
	949	*/
	950	if (error \|\| count != 0)
	951	return (0);
	952	/* avoid infinite loops */
	953	if (dp->rec_len == 0)
	954	return (0);
	955	/* skip empty entries */
	956	if (dp->inode == 0)
	957	continue;
	958	/* accept only "." and ".." */
	959	namlen = dp->name_len;
	960	if (namlen > 2)
	961	return (0);
	962	if (dp->name[0] != '.')
	963	return (0);
	964	/*
	965	* At this point namlen must be 1 or 2.
	966	* 1 implies ".", 2 implies ".." if second
	967	* char is also "."
	968	*/
	969	if (namlen == 1)
	970	continue;
	971	if (dp->name[1] == '.' && dp->inode == parentino)
	972	continue;
	973	return (0);
	974	}
	975	return (1);
	976	}
	977
	978	/*
	979	* Check if source directory is in the path of the target directory.
	980	* Target is supplied locked, source is unlocked.
	981	* The target is always vput before returning.
	982	*/
	983	int
	984	ext2_checkpath(struct inode source, struct inode target, struct ucred *cred)
	985	{
	986	struct vnode *vp;
	987	int error, rootino, namlen;
	988	struct dirtemplate dirbuf;
	989
	990	vp = ITOV(target);
	991	if (target->i_number == source->i_number) {
	992	error = EEXIST;
	993	goto out;
	994	}
	995	rootino = ROOTINO;
	996	error = 0;
	997	if (target->i_number == rootino)
	998	goto out;
	999
	1000	for (;;) {
	1001	if (vp->v_type != VDIR) {
	1002	error = ENOTDIR;
	1003	break;
	1004	}
	1005	error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
	1006	sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
	1007	IO_NODELOCKED, cred, (int *)0, NULL);
	1008	if (error != 0)
	1009	break;
	1010	namlen = dirbuf.dotdot_type; /* like ufs little-endian */
	1011	if (namlen != 2 \|\|
	1012	dirbuf.dotdot_name[0] != '.' \|\|
	1013	dirbuf.dotdot_name[1] != '.') {
	1014	error = ENOTDIR;
	1015	break;
	1016	}
	1017	if (dirbuf.dotdot_ino == source->i_number) {
	1018	error = EINVAL;
	1019	break;
	1020	}
	1021	if (dirbuf.dotdot_ino == rootino)
	1022	break;
	1023	vput(vp);
	1024	if ((error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, &vp)) != 0) {
	1025	vp = NULL;
	1026	break;
	1027	}
	1028	}
	1029
	1030	out:
	1031	if (error == ENOTDIR)
	1032	printf("checkpath: .. not a directory\n");
	1033	if (vp != NULL)
	1034	vput(vp);
	1035	return (error);
	1036	}