gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1993, 1995 Jan-Simon Pendry
	3	* Copyright (c) 1993, 1995
	4	* The Regents of the University of California. All rights reserved.
	5	*
	6	* This code is derived from software contributed to Berkeley by
	7	* Jan-Simon Pendry.
	8	*
	9	* Redistribution and use in source and binary forms, with or without
	10	* modification, are permitted provided that the following conditions
	11	* are met:
	12	* 1. Redistributions of source code must retain the above copyright
	13	* notice, this list of conditions and the following disclaimer.
	14	* 2. Redistributions in binary form must reproduce the above copyright
	15	* notice, this list of conditions and the following disclaimer in the
	16	* documentation and/or other materials provided with the distribution.
	17	* 3. All advertising materials mentioning features or use of this software
	18	* must display the following acknowledgement:
	19	* This product includes software developed by the University of
	20	* California, Berkeley and its contributors.
	21	* 4. Neither the name of the University nor the names of its contributors
	22	* may be used to endorse or promote products derived from this software
	23	* without specific prior written permission.
	24	*
	25	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	26	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	27	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	28	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	29	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	30	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	31	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	32	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	33	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	34	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	35	* SUCH DAMAGE.
	36	*
	37	* @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
	38	*
	39	* $FreeBSD: src/sys/miscfs/procfs/procfs_vnops.c,v 1.76.2.7 2002/01/22 17:22:59 nectar Exp $
	40	* $DragonFly: src/sys/vfs/procfs/procfs_vnops.c,v 1.2 2003/06/17 04:28:42 dillon Exp $
	41	*/
	42
	43	/*
	44	* procfs vnode interface
	45	*/
	46
	47	#include <sys/param.h>
	48	#include <sys/systm.h>
	49	#include <sys/time.h>
	50	#include <sys/kernel.h>
	51	#include <sys/lock.h>
	52	#include <sys/fcntl.h>
	53	#include <sys/proc.h>
	54	#include <sys/signalvar.h>
	55	#include <sys/vnode.h>
	56	#include <sys/uio.h>
	57	#include <sys/mount.h>
	58	#include <sys/namei.h>
	59	#include <sys/dirent.h>
	60	#include <sys/malloc.h>
	61	#include <machine/reg.h>
	62	#include <vm/vm_zone.h>
	63	#include <miscfs/procfs/procfs.h>
	64	#include <sys/pioctl.h>
	65
	66	static int procfs_access __P((struct vop_access_args *));
	67	static int procfs_badop __P((void));
	68	static int procfs_bmap __P((struct vop_bmap_args *));
	69	static int procfs_close __P((struct vop_close_args *));
	70	static int procfs_getattr __P((struct vop_getattr_args *));
	71	static int procfs_inactive __P((struct vop_inactive_args *));
	72	static int procfs_ioctl __P((struct vop_ioctl_args *));
	73	static int procfs_lookup __P((struct vop_lookup_args *));
	74	static int procfs_open __P((struct vop_open_args *));
	75	static int procfs_print __P((struct vop_print_args *));
	76	static int procfs_readdir __P((struct vop_readdir_args *));
	77	static int procfs_readlink __P((struct vop_readlink_args *));
	78	static int procfs_reclaim __P((struct vop_reclaim_args *));
	79	static int procfs_setattr __P((struct vop_setattr_args *));
	80
	81	/*
	82	* This is a list of the valid names in the
	83	* process-specific sub-directories. It is
	84	* used in procfs_lookup and procfs_readdir
	85	*/
	86	static struct proc_target {
	87	u_char pt_type;
	88	u_char pt_namlen;
	89	char *pt_name;
	90	pfstype pt_pfstype;
	91	int (pt_valid) __P((struct proc p));
	92	} proc_targets[] = {
	93	#define N(s) sizeof(s)-1, s
	94	/* name type validp */
	95	{ DT_DIR, N("."), Pproc, NULL },
	96	{ DT_DIR, N(".."), Proot, NULL },
	97	{ DT_REG, N("mem"), Pmem, NULL },
	98	{ DT_REG, N("regs"), Pregs, procfs_validregs },
	99	{ DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs },
	100	{ DT_REG, N("dbregs"), Pdbregs, procfs_validdbregs },
	101	{ DT_REG, N("ctl"), Pctl, NULL },
	102	{ DT_REG, N("status"), Pstatus, NULL },
	103	{ DT_REG, N("note"), Pnote, NULL },
	104	{ DT_REG, N("notepg"), Pnotepg, NULL },
	105	{ DT_REG, N("map"), Pmap, procfs_validmap },
	106	{ DT_REG, N("etype"), Ptype, procfs_validtype },
	107	{ DT_REG, N("cmdline"), Pcmdline, NULL },
	108	{ DT_REG, N("rlimit"), Prlimit, NULL },
	109	{ DT_LNK, N("file"), Pfile, NULL },
	110	#undef N
	111	};
	112	static const int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]);
	113
	114	static pid_t atopid __P((const char *, u_int));
	115
	116	/*
	117	* set things up for doing i/o on
	118	* the pfsnode (vp). (vp) is locked
	119	* on entry, and should be left locked
	120	* on exit.
	121	*
	122	* for procfs we don't need to do anything
	123	* in particular for i/o. all that is done
	124	* is to support exclusive open on process
	125	* memory images.
	126	*/
	127	static int
	128	procfs_open(ap)
	129	struct vop_open_args /* {
	130	struct vnode *a_vp;
	131	int a_mode;
	132	struct ucred *a_cred;
	133	struct proc *a_p;
	134	} / ap;
	135	{
	136	struct pfsnode *pfs = VTOPFS(ap->a_vp);
	137	struct proc p1, p2;
	138
	139	p2 = PFIND(pfs->pfs_pid);
	140	if (p2 == NULL)
	141	return (ENOENT);
	142	if (pfs->pfs_pid && !PRISON_CHECK(ap->a_p, p2))
	143	return (ENOENT);
	144
	145	switch (pfs->pfs_type) {
	146	case Pmem:
	147	if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) \|\|
	148	((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE)))
	149	return (EBUSY);
	150
	151	p1 = ap->a_p;
	152	/* Can't trace a process that's currently exec'ing. */
	153	if ((p2->p_flag & P_INEXEC) != 0)
	154	return EAGAIN;
	155	if (!CHECKIO(p1, p2) \|\| p_trespass(p1, p2))
	156	return (EPERM);
	157
	158	if (ap->a_mode & FWRITE)
	159	pfs->pfs_flags = ap->a_mode & (FWRITE\|O_EXCL);
	160
	161	return (0);
	162
	163	default:
	164	break;
	165	}
	166
	167	return (0);
	168	}
	169
	170	/*
	171	* close the pfsnode (vp) after doing i/o.
	172	* (vp) is not locked on entry or exit.
	173	*
	174	* nothing to do for procfs other than undo
	175	* any exclusive open flag (see _open above).
	176	*/
	177	static int
	178	procfs_close(ap)
	179	struct vop_close_args /* {
	180	struct vnode *a_vp;
	181	int a_fflag;
	182	struct ucred *a_cred;
	183	struct proc *a_p;
	184	} / ap;
	185	{
	186	struct pfsnode *pfs = VTOPFS(ap->a_vp);
	187	struct proc *p;
	188
	189	switch (pfs->pfs_type) {
	190	case Pmem:
	191	if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
	192	pfs->pfs_flags &= ~(FWRITE\|O_EXCL);
	193	/*
	194	* This rather complicated-looking code is trying to
	195	* determine if this was the last close on this particular
	196	* vnode. While one would expect v_usecount to be 1 at
	197	* that point, it seems that (according to John Dyson)
	198	* the VM system will bump up the usecount. So: if the
	199	* usecount is 2, and VOBJBUF is set, then this is really
	200	* the last close. Otherwise, if the usecount is < 2
	201	* then it is definitely the last close.
	202	* If this is the last close, then it checks to see if
	203	* the target process has PF_LINGER set in p_pfsflags,
	204	* if this is not the case, then the process' stop flags
	205	* are cleared, and the process is woken up. This is
	206	* to help prevent the case where a process has been
	207	* told to stop on an event, but then the requesting process
	208	* has gone away or forgotten about it.
	209	*/
	210	if ((ap->a_vp->v_usecount < 2)
	211	&& (p = pfind(pfs->pfs_pid))
	212	&& !(p->p_pfsflags & PF_LINGER)) {
	213	p->p_stops = 0;
	214	p->p_step = 0;
	215	wakeup(&p->p_step);
	216	}
	217	break;
	218	default:
	219	break;
	220	}
	221
	222	return (0);
	223	}
	224
	225	/*
	226	* do an ioctl operation on a pfsnode (vp).
	227	* (vp) is not locked on entry or exit.
	228	*/
	229	static int
	230	procfs_ioctl(ap)
	231	struct vop_ioctl_args *ap;
	232	{
	233	struct pfsnode *pfs = VTOPFS(ap->a_vp);
	234	struct proc procp, p;
	235	int error;
	236	int signo;
	237	struct procfs_status *psp;
	238	unsigned char flags;
	239
	240	p = ap->a_p;
	241	procp = pfind(pfs->pfs_pid);
	242	if (procp == NULL) {
	243	return ENOTTY;
	244	}
	245
	246	/* Can't trace a process that's currently exec'ing. */
	247	if ((procp->p_flag & P_INEXEC) != 0)
	248	return EAGAIN;
	249	if (!CHECKIO(p, procp) \|\| p_trespass(p, procp))
	250	return EPERM;
	251
	252	switch (ap->a_command) {
	253	case PIOCBIS:
	254	procp->p_stops \|= (unsigned int)ap->a_data;
	255	break;
	256	case PIOCBIC:
	257	procp->p_stops &= ~(unsigned int)ap->a_data;
	258	break;
	259	case PIOCSFL:
	260	/*
	261	* NFLAGS is "non-suser_xxx flags" -- currently, only
	262	* PFS_ISUGID ("ignore set u/g id");
	263	*/
	264	#define NFLAGS (PF_ISUGID)
	265	flags = (unsigned char)(unsigned int)ap->a_data;
	266	if (flags & NFLAGS && (error = suser(p)))
	267	return error;
	268	procp->p_pfsflags = flags;
	269	break;
	270	case PIOCGFL:
	271	(unsigned int)ap->a_data = (unsigned int)procp->p_pfsflags;
	272	break;
	273	case PIOCSTATUS:
	274	psp = (struct procfs_status *)ap->a_data;
	275	psp->state = (procp->p_step == 0);
	276	psp->flags = procp->p_pfsflags;
	277	psp->events = procp->p_stops;
	278	if (procp->p_step) {
	279	psp->why = procp->p_stype;
	280	psp->val = procp->p_xstat;
	281	} else {
	282	psp->why = psp->val = 0; /* Not defined values */
	283	}
	284	break;
	285	case PIOCWAIT:
	286	psp = (struct procfs_status *)ap->a_data;
	287	if (procp->p_step == 0) {
	288	error = tsleep(&procp->p_stype, PWAIT \| PCATCH, "piocwait", 0);
	289	if (error)
	290	return error;
	291	}
	292	psp->state = 1; /* It stopped */
	293	psp->flags = procp->p_pfsflags;
	294	psp->events = procp->p_stops;
	295	psp->why = procp->p_stype; /* why it stopped */
	296	psp->val = procp->p_xstat; /* any extra info */
	297	break;
	298	case PIOCCONT: /* Restart a proc */
	299	if (procp->p_step == 0)
	300	return EINVAL; /* Can only start a stopped process */
	301	if ((signo = (int)ap->a_data) != 0) {
	302	if (signo >= NSIG \|\| signo <= 0)
	303	return EINVAL;
	304	psignal(procp, signo);
	305	}
	306	procp->p_step = 0;
	307	wakeup(&procp->p_step);
	308	break;
	309	default:
	310	return (ENOTTY);
	311	}
	312	return 0;
	313	}
	314
	315	/*
	316	* do block mapping for pfsnode (vp).
	317	* since we don't use the buffer cache
	318	* for procfs this function should never
	319	* be called. in any case, it's not clear
	320	* what part of the kernel ever makes use
	321	* of this function. for sanity, this is the
	322	* usual no-op bmap, although returning
	323	* (EIO) would be a reasonable alternative.
	324	*/
	325	static int
	326	procfs_bmap(ap)
	327	struct vop_bmap_args /* {
	328	struct vnode *a_vp;
	329	daddr_t a_bn;
	330	struct vnode **a_vpp;
	331	daddr_t *a_bnp;
	332	int *a_runp;
	333	} / ap;
	334	{
	335
	336	if (ap->a_vpp != NULL)
	337	*ap->a_vpp = ap->a_vp;
	338	if (ap->a_bnp != NULL)
	339	*ap->a_bnp = ap->a_bn;
	340	if (ap->a_runp != NULL)
	341	*ap->a_runp = 0;
	342	return (0);
	343	}
	344
	345	/*
	346	* procfs_inactive is called when the pfsnode
	347	* is vrele'd and the reference count goes
	348	* to zero. (vp) will be on the vnode free
	349	* list, so to get it back vget() must be
	350	* used.
	351	*
	352	* (vp) is locked on entry, but must be unlocked on exit.
	353	*/
	354	static int
	355	procfs_inactive(ap)
	356	struct vop_inactive_args /* {
	357	struct vnode *a_vp;
	358	} / ap;
	359	{
	360	struct vnode *vp = ap->a_vp;
	361
	362	VOP_UNLOCK(vp, 0, ap->a_p);
	363
	364	return (0);
	365	}
	366
	367	/*
	368	* _reclaim is called when getnewvnode()
	369	* wants to make use of an entry on the vnode
	370	* free list. at this time the filesystem needs
	371	* to free any private data and remove the node
	372	* from any private lists.
	373	*/
	374	static int
	375	procfs_reclaim(ap)
	376	struct vop_reclaim_args /* {
	377	struct vnode *a_vp;
	378	} / ap;
	379	{
	380
	381	return (procfs_freevp(ap->a_vp));
	382	}
	383
	384	/*
	385	* _print is used for debugging.
	386	* just print a readable description
	387	* of (vp).
	388	*/
	389	static int
	390	procfs_print(ap)
	391	struct vop_print_args /* {
	392	struct vnode *a_vp;
	393	} / ap;
	394	{
	395	struct pfsnode *pfs = VTOPFS(ap->a_vp);
	396
	397	printf("tag VT_PROCFS, type %d, pid %ld, mode %x, flags %lx\n",
	398	pfs->pfs_type, (long)pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
	399	return (0);
	400	}
	401
	402	/*
	403	* generic entry point for unsupported operations
	404	*/
	405	static int
	406	procfs_badop()
	407	{
	408
	409	return (EIO);
	410	}
	411
	412	/*
	413	* Invent attributes for pfsnode (vp) and store
	414	* them in (vap).
	415	* Directories lengths are returned as zero since
	416	* any real length would require the genuine size
	417	* to be computed, and nothing cares anyway.
	418	*
	419	* this is relatively minimal for procfs.
	420	*/
	421	static int
	422	procfs_getattr(ap)
	423	struct vop_getattr_args /* {
	424	struct vnode *a_vp;
	425	struct vattr *a_vap;
	426	struct ucred *a_cred;
	427	struct proc *a_p;
	428	} / ap;
	429	{
	430	struct pfsnode *pfs = VTOPFS(ap->a_vp);
	431	struct vattr *vap = ap->a_vap;
	432	struct proc *procp;
	433	int error;
	434
	435	/*
	436	* First make sure that the process and its credentials
	437	* still exist.
	438	*/
	439	switch (pfs->pfs_type) {
	440	case Proot:
	441	case Pcurproc:
	442	procp = 0;
	443	break;
	444
	445	default:
	446	procp = PFIND(pfs->pfs_pid);
	447	if (procp == NULL \|\| procp->p_cred == NULL \|\|
	448	procp->p_ucred == NULL)
	449	return (ENOENT);
	450	}
	451
	452	error = 0;
	453
	454	/* start by zeroing out the attributes */
	455	VATTR_NULL(vap);
	456
	457	/* next do all the common fields */
	458	vap->va_type = ap->a_vp->v_type;
	459	vap->va_mode = pfs->pfs_mode;
	460	vap->va_fileid = pfs->pfs_fileno;
	461	vap->va_flags = 0;
	462	vap->va_blocksize = PAGE_SIZE;
	463	vap->va_bytes = vap->va_size = 0;
	464	vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
	465
	466	/*
	467	* Make all times be current TOD.
	468	* It would be possible to get the process start
	469	* time from the p_stat structure, but there's
	470	* no "file creation" time stamp anyway, and the
	471	* p_stat structure is not addressible if u. gets
	472	* swapped out for that process.
	473	*/
	474	nanotime(&vap->va_ctime);
	475	vap->va_atime = vap->va_mtime = vap->va_ctime;
	476
	477	/*
	478	* If the process has exercised some setuid or setgid
	479	* privilege, then rip away read/write permission so
	480	* that only root can gain access.
	481	*/
	482	switch (pfs->pfs_type) {
	483	case Pctl:
	484	case Pregs:
	485	case Pfpregs:
	486	case Pdbregs:
	487	case Pmem:
	488	if (procp->p_flag & P_SUGID)
	489	vap->va_mode &= ~((VREAD\|VWRITE)\|
	490	((VREAD\|VWRITE)>>3)\|
	491	((VREAD\|VWRITE)>>6));
	492	break;
	493	default:
	494	break;
	495	}
	496
	497	/*
	498	* now do the object specific fields
	499	*
	500	* The size could be set from struct reg, but it's hardly
	501	* worth the trouble, and it puts some (potentially) machine
	502	* dependent data into this machine-independent code. If it
	503	* becomes important then this function should break out into
	504	* a per-file stat function in the corresponding .c file.
	505	*/
	506
	507	vap->va_nlink = 1;
	508	if (procp) {
	509	vap->va_uid = procp->p_ucred->cr_uid;
	510	vap->va_gid = procp->p_ucred->cr_gid;
	511	}
	512
	513	switch (pfs->pfs_type) {
	514	case Proot:
	515	/*
	516	* Set nlink to 1 to tell fts(3) we don't actually know.
	517	*/
	518	vap->va_nlink = 1;
	519	vap->va_uid = 0;
	520	vap->va_gid = 0;
	521	vap->va_size = vap->va_bytes = DEV_BSIZE;
	522	break;
	523
	524	case Pcurproc: {
	525	char buf[16]; /* should be enough */
	526	vap->va_uid = 0;
	527	vap->va_gid = 0;
	528	vap->va_size = vap->va_bytes =
	529	snprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid);
	530	break;
	531	}
	532
	533	case Pproc:
	534	vap->va_nlink = nproc_targets;
	535	vap->va_size = vap->va_bytes = DEV_BSIZE;
	536	break;
	537
	538	case Pfile: {
	539	char fullpath, freepath;
	540	error = textvp_fullpath(procp, &fullpath, &freepath);
	541	if (error == 0) {
	542	vap->va_size = strlen(fullpath);
	543	free(freepath, M_TEMP);
	544	} else {
	545	vap->va_size = sizeof("unknown") - 1;
	546	error = 0;
	547	}
	548	vap->va_bytes = vap->va_size;
	549	break;
	550	}
	551
	552	case Pmem:
	553	/*
	554	* If we denied owner access earlier, then we have to
	555	* change the owner to root - otherwise 'ps' and friends
	556	* will break even though they are setgid kmem. SIGH
	557	*/
	558	if (procp->p_flag & P_SUGID)
	559	vap->va_uid = 0;
	560	else
	561	vap->va_uid = procp->p_ucred->cr_uid;
	562	break;
	563
	564	case Pregs:
	565	vap->va_bytes = vap->va_size = sizeof(struct reg);
	566	break;
	567
	568	case Pfpregs:
	569	vap->va_bytes = vap->va_size = sizeof(struct fpreg);
	570	break;
	571
	572	case Pdbregs:
	573	vap->va_bytes = vap->va_size = sizeof(struct dbreg);
	574	break;
	575
	576	case Ptype:
	577	case Pmap:
	578	case Pctl:
	579	case Pstatus:
	580	case Pnote:
	581	case Pnotepg:
	582	case Pcmdline:
	583	case Prlimit:
	584	break;
	585
	586	default:
	587	panic("procfs_getattr");
	588	}
	589
	590	return (error);
	591	}
	592
	593	static int
	594	procfs_setattr(ap)
	595	struct vop_setattr_args /* {
	596	struct vnode *a_vp;
	597	struct vattr *a_vap;
	598	struct ucred *a_cred;
	599	struct proc *a_p;
	600	} / ap;
	601	{
	602
	603	if (ap->a_vap->va_flags != VNOVAL)
	604	return (EOPNOTSUPP);
	605
	606	/*
	607	* just fake out attribute setting
	608	* it's not good to generate an error
	609	* return, otherwise things like creat()
	610	* will fail when they try to set the
	611	* file length to 0. worse, this means
	612	* that echo $note > /proc/$pid/note will fail.
	613	*/
	614
	615	return (0);
	616	}
	617
	618	/*
	619	* implement access checking.
	620	*
	621	* something very similar to this code is duplicated
	622	* throughout the 4bsd kernel and should be moved
	623	* into kern/vfs_subr.c sometime.
	624	*
	625	* actually, the check for super-user is slightly
	626	* broken since it will allow read access to write-only
	627	* objects. this doesn't cause any particular trouble
	628	* but does mean that the i/o entry points need to check
	629	* that the operation really does make sense.
	630	*/
	631	static int
	632	procfs_access(ap)
	633	struct vop_access_args /* {
	634	struct vnode *a_vp;
	635	int a_mode;
	636	struct ucred *a_cred;
	637	struct proc *a_p;
	638	} / ap;
	639	{
	640	struct vattr *vap;
	641	struct vattr vattr;
	642	int error;
	643
	644	/*
	645	* If you're the super-user,
	646	* you always get access.
	647	*/
	648	if (ap->a_cred->cr_uid == 0)
	649	return (0);
	650
	651	vap = &vattr;
	652	error = VOP_GETATTR(ap->a_vp, vap, ap->a_cred, ap->a_p);
	653	if (error)
	654	return (error);
	655
	656	/*
	657	* Access check is based on only one of owner, group, public.
	658	* If not owner, then check group. If not a member of the
	659	* group, then check public access.
	660	*/
	661	if (ap->a_cred->cr_uid != vap->va_uid) {
	662	gid_t *gp;
	663	int i;
	664
	665	ap->a_mode >>= 3;
	666	gp = ap->a_cred->cr_groups;
	667	for (i = 0; i < ap->a_cred->cr_ngroups; i++, gp++)
	668	if (vap->va_gid == *gp)
	669	goto found;
	670	ap->a_mode >>= 3;
	671	found:
	672	;
	673	}
	674
	675	if ((vap->va_mode & ap->a_mode) == ap->a_mode)
	676	return (0);
	677
	678	return (EACCES);
	679	}
	680
	681	/*
	682	* lookup. this is incredibly complicated in the
	683	* general case, however for most pseudo-filesystems
	684	* very little needs to be done.
	685	*
	686	* unless you want to get a migraine, just make sure your
	687	* filesystem doesn't do any locking of its own. otherwise
	688	* read and inwardly digest ufs_lookup().
	689	*/
	690	static int
	691	procfs_lookup(ap)
	692	struct vop_lookup_args /* {
	693	struct vnode * a_dvp;
	694	struct vnode ** a_vpp;
	695	struct componentname * a_cnp;
	696	} / ap;
	697	{
	698	struct componentname *cnp = ap->a_cnp;
	699	struct vnode **vpp = ap->a_vpp;
	700	struct vnode *dvp = ap->a_dvp;
	701	char *pname = cnp->cn_nameptr;
	702	/* struct proc curp = cnp->cn_proc; /
	703	struct proc_target *pt;
	704	pid_t pid;
	705	struct pfsnode *pfs;
	706	struct proc *p;
	707	int i;
	708
	709	*vpp = NULL;
	710
	711	if (cnp->cn_nameiop == DELETE \|\| cnp->cn_nameiop == RENAME)
	712	return (EROFS);
	713
	714	if (cnp->cn_namelen == 1 && *pname == '.') {
	715	*vpp = dvp;
	716	VREF(dvp);
	717	/* vn_lock(dvp, LK_EXCLUSIVE \| LK_RETRY, curp); */
	718	return (0);
	719	}
	720
	721	pfs = VTOPFS(dvp);
	722	switch (pfs->pfs_type) {
	723	case Proot:
	724	if (cnp->cn_flags & ISDOTDOT)
	725	return (EIO);
	726
	727	if (CNEQ(cnp, "curproc", 7))
	728	return (procfs_allocvp(dvp->v_mount, vpp, 0, Pcurproc));
	729
	730	pid = atopid(pname, cnp->cn_namelen);
	731	if (pid == NO_PID)
	732	break;
	733
	734	p = PFIND(pid);
	735	if (p == NULL)
	736	break;
	737
	738	return (procfs_allocvp(dvp->v_mount, vpp, pid, Pproc));
	739
	740	case Pproc:
	741	if (cnp->cn_flags & ISDOTDOT)
	742	return (procfs_root(dvp->v_mount, vpp));
	743
	744	p = PFIND(pfs->pfs_pid);
	745	if (p == NULL)
	746	break;
	747
	748	for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
	749	if (cnp->cn_namelen == pt->pt_namlen &&
	750	bcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
	751	(pt->pt_valid == NULL \|\| (*pt->pt_valid)(p)))
	752	goto found;
	753	}
	754	break;
	755	found:
	756	return (procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
	757	pt->pt_pfstype));
	758
	759	default:
	760	return (ENOTDIR);
	761	}
	762
	763	return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
	764	}
	765
	766	/*
	767	* Does this process have a text file?
	768	*/
	769	int
	770	procfs_validfile(p)
	771	struct proc *p;
	772	{
	773
	774	return (procfs_findtextvp(p) != NULLVP);
	775	}
	776
	777	/*
	778	* readdir() returns directory entries from pfsnode (vp).
	779	*
	780	* We generate just one directory entry at a time, as it would probably
	781	* not pay off to buffer several entries locally to save uiomove calls.
	782	*/
	783	static int
	784	procfs_readdir(ap)
	785	struct vop_readdir_args /* {
	786	struct vnode *a_vp;
	787	struct uio *a_uio;
	788	struct ucred *a_cred;
	789	int *a_eofflag;
	790	int *a_ncookies;
	791	u_long **a_cookies;
	792	} / ap;
	793	{
	794	struct uio *uio = ap->a_uio;
	795	struct dirent d;
	796	struct dirent *dp = &d;
	797	struct pfsnode *pfs;
	798	int count, error, i, off;
	799	static u_int delen;
	800
	801	if (!delen) {
	802
	803	d.d_namlen = PROCFS_NAMELEN;
	804	delen = GENERIC_DIRSIZ(&d);
	805	}
	806
	807	pfs = VTOPFS(ap->a_vp);
	808
	809	off = (int)uio->uio_offset;
	810	if (off != uio->uio_offset \|\| off < 0 \|\|
	811	off % delen != 0 \|\| uio->uio_resid < delen)
	812	return (EINVAL);
	813
	814	error = 0;
	815	count = 0;
	816	i = off / delen;
	817
	818	switch (pfs->pfs_type) {
	819	/*
	820	* this is for the process-specific sub-directories.
	821	* all that is needed to is copy out all the entries
	822	* from the procent[] table (top of this file).
	823	*/
	824	case Pproc: {
	825	struct proc *p;
	826	struct proc_target *pt;
	827
	828	p = PFIND(pfs->pfs_pid);
	829	if (p == NULL)
	830	break;
	831	if (!PRISON_CHECK(curproc, p))
	832	break;
	833
	834	for (pt = &proc_targets[i];
	835	uio->uio_resid >= delen && i < nproc_targets; pt++, i++) {
	836	if (pt->pt_valid && (*pt->pt_valid)(p) == 0)
	837	continue;
	838
	839	dp->d_reclen = delen;
	840	dp->d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype);
	841	dp->d_namlen = pt->pt_namlen;
	842	bcopy(pt->pt_name, dp->d_name, pt->pt_namlen + 1);
	843	dp->d_type = pt->pt_type;
	844
	845	if ((error = uiomove((caddr_t)dp, delen, uio)) != 0)
	846	break;
	847	}
	848
	849	break;
	850	}
	851
	852	/*
	853	* this is for the root of the procfs filesystem
	854	* what is needed is a special entry for "curproc"
	855	* followed by an entry for each process on allproc
	856	#ifdef PROCFS_ZOMBIE
	857	* and zombproc.
	858	#endif
	859	*/
	860
	861	case Proot: {
	862	#ifdef PROCFS_ZOMBIE
	863	int doingzomb = 0;
	864	#endif
	865	int pcnt = 0;
	866	volatile struct proc *p = allproc.lh_first;
	867
	868	for (; p && uio->uio_resid >= delen; i++, pcnt++) {
	869	bzero((char *) dp, delen);
	870	dp->d_reclen = delen;
	871
	872	switch (i) {
	873	case 0: /* `.' */
	874	case 1: /* `..' */
	875	dp->d_fileno = PROCFS_FILENO(0, Proot);
	876	dp->d_namlen = i + 1;
	877	bcopy("..", dp->d_name, dp->d_namlen);
	878	dp->d_name[i + 1] = '\0';
	879	dp->d_type = DT_DIR;
	880	break;
	881
	882	case 2:
	883	dp->d_fileno = PROCFS_FILENO(0, Pcurproc);
	884	dp->d_namlen = 7;
	885	bcopy("curproc", dp->d_name, 8);
	886	dp->d_type = DT_LNK;
	887	break;
	888
	889	default:
	890	while (pcnt < i) {
	891	p = p->p_list.le_next;
	892	if (!p)
	893	goto done;
	894	if (!PRISON_CHECK(curproc, p))
	895	continue;
	896	pcnt++;
	897	}
	898	while (!PRISON_CHECK(curproc, p)) {
	899	p = p->p_list.le_next;
	900	if (!p)
	901	goto done;
	902	}
	903	dp->d_fileno = PROCFS_FILENO(p->p_pid, Pproc);
	904	dp->d_namlen = sprintf(dp->d_name, "%ld",
	905	(long)p->p_pid);
	906	dp->d_type = DT_DIR;
	907	p = p->p_list.le_next;
	908	break;
	909	}
	910
	911	if ((error = uiomove((caddr_t)dp, delen, uio)) != 0)
	912	break;
	913	}
	914	done:
	915
	916	#ifdef PROCFS_ZOMBIE
	917	if (p == NULL && doingzomb == 0) {
	918	doingzomb = 1;
	919	p = zombproc.lh_first;
	920	goto again;
	921	}
	922	#endif
	923
	924	break;
	925
	926	}
	927
	928	default:
	929	error = ENOTDIR;
	930	break;
	931	}
	932
	933	uio->uio_offset = i * delen;
	934
	935	return (error);
	936	}
	937
	938	/*
	939	* readlink reads the link of `curproc' or `file'
	940	*/
	941	static int
	942	procfs_readlink(ap)
	943	struct vop_readlink_args *ap;
	944	{
	945	char buf[16]; /* should be enough */
	946	struct proc *procp;
	947	struct vnode *vp = ap->a_vp;
	948	struct pfsnode *pfs = VTOPFS(vp);
	949	char fullpath, freepath;
	950	int error, len;
	951
	952	switch (pfs->pfs_type) {
	953	case Pcurproc:
	954	if (pfs->pfs_fileno != PROCFS_FILENO(0, Pcurproc))
	955	return (EINVAL);
	956
	957	len = snprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid);
	958
	959	return (uiomove(buf, len, ap->a_uio));
	960	/*
	961	* There _should_ be no way for an entire process to disappear
	962	* from under us...
	963	*/
	964	case Pfile:
	965	procp = PFIND(pfs->pfs_pid);
	966	if (procp == NULL \|\| procp->p_cred == NULL \|\|
	967	procp->p_ucred == NULL) {
	968	printf("procfs_readlink: pid %d disappeared\n",
	969	pfs->pfs_pid);
	970	return (uiomove("unknown", sizeof("unknown") - 1,
	971	ap->a_uio));
	972	}
	973	error = textvp_fullpath(procp, &fullpath, &freepath);
	974	if (error != 0)
	975	return (uiomove("unknown", sizeof("unknown") - 1,
	976	ap->a_uio));
	977	error = uiomove(fullpath, strlen(fullpath), ap->a_uio);
	978	free(freepath, M_TEMP);
	979	return (error);
	980	default:
	981	return (EINVAL);
	982	}
	983	}
	984
	985	/*
	986	* convert decimal ascii to pid_t
	987	*/
	988	static pid_t
	989	atopid(b, len)
	990	const char *b;
	991	u_int len;
	992	{
	993	pid_t p = 0;
	994
	995	while (len--) {
	996	char c = *b++;
	997	if (c < '0' \|\| c > '9')
	998	return (NO_PID);
	999	p = 10 * p + (c - '0');
	1000	if (p > PID_MAX)
	1001	return (NO_PID);
	1002	}
	1003
	1004	return (p);
	1005	}
	1006
	1007	/*
	1008	* procfs vnode operations.
	1009	*/
	1010	vop_t **procfs_vnodeop_p;
	1011	static struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
	1012	{ &vop_default_desc, (vop_t *) vop_defaultop },
	1013	{ &vop_access_desc, (vop_t *) procfs_access },
	1014	{ &vop_advlock_desc, (vop_t *) procfs_badop },
	1015	{ &vop_bmap_desc, (vop_t *) procfs_bmap },
	1016	{ &vop_close_desc, (vop_t *) procfs_close },
	1017	{ &vop_create_desc, (vop_t *) procfs_badop },
	1018	{ &vop_getattr_desc, (vop_t *) procfs_getattr },
	1019	{ &vop_inactive_desc, (vop_t *) procfs_inactive },
	1020	{ &vop_link_desc, (vop_t *) procfs_badop },
	1021	{ &vop_lookup_desc, (vop_t *) procfs_lookup },
	1022	{ &vop_mkdir_desc, (vop_t *) procfs_badop },
	1023	{ &vop_mknod_desc, (vop_t *) procfs_badop },
	1024	{ &vop_open_desc, (vop_t *) procfs_open },
	1025	{ &vop_pathconf_desc, (vop_t *) vop_stdpathconf },
	1026	{ &vop_print_desc, (vop_t *) procfs_print },
	1027	{ &vop_read_desc, (vop_t *) procfs_rw },
	1028	{ &vop_readdir_desc, (vop_t *) procfs_readdir },
	1029	{ &vop_readlink_desc, (vop_t *) procfs_readlink },
	1030	{ &vop_reclaim_desc, (vop_t *) procfs_reclaim },
	1031	{ &vop_remove_desc, (vop_t *) procfs_badop },
	1032	{ &vop_rename_desc, (vop_t *) procfs_badop },
	1033	{ &vop_rmdir_desc, (vop_t *) procfs_badop },
	1034	{ &vop_setattr_desc, (vop_t *) procfs_setattr },
	1035	{ &vop_symlink_desc, (vop_t *) procfs_badop },
	1036	{ &vop_write_desc, (vop_t *) procfs_rw },
	1037	{ &vop_ioctl_desc, (vop_t *) procfs_ioctl },
	1038	{ NULL, NULL }
	1039	};
	1040	static struct vnodeopv_desc procfs_vnodeop_opv_desc =
	1041	{ &procfs_vnodeop_p, procfs_vnodeop_entries };
	1042
	1043	VNODEOP_SET(procfs_vnodeop_opv_desc);