gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1993, 1995 Jan-Simon Pendry
	3	* Copyright (c) 1993, 1995
	4	* The Regents of the University of California. All rights reserved.
	5	*
	6	* This code is derived from software contributed to Berkeley by
	7	* Jan-Simon Pendry.
	8	*
	9	* Redistribution and use in source and binary forms, with or without
	10	* modification, are permitted provided that the following conditions
	11	* are met:
	12	* 1. Redistributions of source code must retain the above copyright
	13	* notice, this list of conditions and the following disclaimer.
	14	* 2. Redistributions in binary form must reproduce the above copyright
	15	* notice, this list of conditions and the following disclaimer in the
	16	* documentation and/or other materials provided with the distribution.
	17	* 3. All advertising materials mentioning features or use of this software
	18	* must display the following acknowledgement:
	19	* This product includes software developed by the University of
	20	* California, Berkeley and its contributors.
	21	* 4. Neither the name of the University nor the names of its contributors
	22	* may be used to endorse or promote products derived from this software
	23	* without specific prior written permission.
	24	*
	25	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	26	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	27	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	28	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	29	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	30	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	31	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	32	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	33	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	34	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	35	* SUCH DAMAGE.
	36	*
	37	* @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
	38	*
	39	* $FreeBSD: src/sys/miscfs/procfs/procfs_vnops.c,v 1.76.2.7 2002/01/22 17:22:59 nectar Exp $
	40	*/
	41
	42	/*
	43	* procfs vnode interface
	44	*/
	45
	46	#include <sys/param.h>
	47	#include <sys/systm.h>
	48	#include <sys/time.h>
	49	#include <sys/kernel.h>
	50	#include <sys/lock.h>
	51	#include <sys/fcntl.h>
	52	#include <sys/proc.h>
	53	#include <sys/priv.h>
	54	#include <sys/signalvar.h>
	55	#include <sys/vnode.h>
	56	#include <sys/uio.h>
	57	#include <sys/mount.h>
	58	#include <sys/namei.h>
	59	#include <sys/dirent.h>
	60	#include <sys/malloc.h>
	61	#include <sys/reg.h>
	62	#include <vm/vm_zone.h>
	63	#include <vfs/procfs/procfs.h>
	64	#include <sys/pioctl.h>
	65
	66	#include <sys/spinlock2.h>
	67
	68	#include <machine/limits.h>
	69
	70	static int procfs_access (struct vop_access_args *);
	71	static int procfs_badop (struct vop_generic_args *);
	72	static int procfs_bmap (struct vop_bmap_args *);
	73	static int procfs_close (struct vop_close_args *);
	74	static int procfs_getattr (struct vop_getattr_args *);
	75	static int procfs_inactive (struct vop_inactive_args *);
	76	static int procfs_ioctl (struct vop_ioctl_args *);
	77	static int procfs_lookup (struct vop_old_lookup_args *);
	78	static int procfs_open (struct vop_open_args *);
	79	static int procfs_print (struct vop_print_args *);
	80	static int procfs_readdir (struct vop_readdir_args *);
	81	static int procfs_readlink (struct vop_readlink_args *);
	82	static int procfs_reclaim (struct vop_reclaim_args *);
	83	static int procfs_setattr (struct vop_setattr_args *);
	84
	85	static int procfs_readdir_proc(struct vop_readdir_args *);
	86	static int procfs_readdir_root(struct vop_readdir_args *);
	87
	88	/*
	89	* procfs vnode operations.
	90	*/
	91	struct vop_ops procfs_vnode_vops = {
	92	.vop_default = vop_defaultop,
	93	.vop_access = procfs_access,
	94	.vop_advlock = (void *)procfs_badop,
	95	.vop_bmap = procfs_bmap,
	96	.vop_close = procfs_close,
	97	.vop_old_create = (void *)procfs_badop,
	98	.vop_getattr = procfs_getattr,
	99	.vop_inactive = procfs_inactive,
	100	.vop_old_link = (void *)procfs_badop,
	101	.vop_old_lookup = procfs_lookup,
	102	.vop_old_mkdir = (void *)procfs_badop,
	103	.vop_old_mknod = (void *)procfs_badop,
	104	.vop_open = procfs_open,
	105	.vop_pathconf = vop_stdpathconf,
	106	.vop_print = procfs_print,
	107	.vop_read = procfs_rw,
	108	.vop_readdir = procfs_readdir,
	109	.vop_readlink = procfs_readlink,
	110	.vop_reclaim = procfs_reclaim,
	111	.vop_old_remove = (void *)procfs_badop,
	112	.vop_old_rename = (void *)procfs_badop,
	113	.vop_old_rmdir = (void *)procfs_badop,
	114	.vop_setattr = procfs_setattr,
	115	.vop_old_symlink = (void *)procfs_badop,
	116	.vop_write = (void *)procfs_rw,
	117	.vop_ioctl = procfs_ioctl
	118	};
	119
	120
	121	/*
	122	* This is a list of the valid names in the
	123	* process-specific sub-directories. It is
	124	* used in procfs_lookup and procfs_readdir
	125	*/
	126	static struct proc_target {
	127	u_char pt_type;
	128	u_char pt_namlen;
	129	char *pt_name;
	130	pfstype pt_pfstype;
	131	int (pt_valid) (struct lwp p);
	132	} proc_targets[] = {
	133	#define N(s) sizeof(s)-1, s
	134	/* name type validp */
	135	{ DT_DIR, N("."), Pproc, NULL },
	136	{ DT_DIR, N(".."), Proot, NULL },
	137	{ DT_REG, N("mem"), Pmem, NULL },
	138	{ DT_REG, N("regs"), Pregs, procfs_validregs },
	139	{ DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs },
	140	{ DT_REG, N("dbregs"), Pdbregs, procfs_validdbregs },
	141	{ DT_REG, N("ctl"), Pctl, NULL },
	142	{ DT_REG, N("status"), Pstatus, NULL },
	143	{ DT_REG, N("note"), Pnote, NULL },
	144	{ DT_REG, N("notepg"), Pnotepg, NULL },
	145	{ DT_REG, N("map"), Pmap, procfs_validmap },
	146	{ DT_REG, N("etype"), Ptype, procfs_validtype },
	147	{ DT_REG, N("cmdline"), Pcmdline, NULL },
	148	{ DT_REG, N("rlimit"), Prlimit, NULL },
	149	{ DT_LNK, N("file"), Pfile, NULL },
	150	#undef N
	151	};
	152	static const int nproc_targets = NELEM(proc_targets);
	153
	154	static pid_t atopid (const char *, u_int);
	155
	156	/*
	157	* set things up for doing i/o on
	158	* the pfsnode (vp). (vp) is locked
	159	* on entry, and should be left locked
	160	* on exit.
	161	*
	162	* for procfs we don't need to do anything
	163	* in particular for i/o. all that is done
	164	* is to support exclusive open on process
	165	* memory images.
	166	*
	167	* procfs_open(struct vnode a_vp, int a_mode, struct ucred a_cred,
	168	* struct file *a_fp)
	169	*/
	170	static int
	171	procfs_open(struct vop_open_args *ap)
	172	{
	173	struct pfsnode *pfs = VTOPFS(ap->a_vp);
	174	struct proc p1, p2;
	175	int error;
	176
	177	p2 = pfs_pfind(pfs->pfs_pid);
	178	if (p2 == NULL)
	179	return (ENOENT);
	180	if (pfs->pfs_pid && !PRISON_CHECK(ap->a_cred, p2->p_ucred)) {
	181	error = ENOENT;
	182	goto done;
	183	}
	184
	185	switch (pfs->pfs_type) {
	186	case Pmem:
	187	if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) \|\|
	188	((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) {
	189	error = EBUSY;
	190	goto done;
	191	}
	192
	193	p1 = curproc;
	194	KKASSERT(p1);
	195	/* Can't trace a process that's currently exec'ing. */
	196	if ((p2->p_flags & P_INEXEC) != 0) {
	197	error = EAGAIN;
	198	goto done;
	199	}
	200	if (!CHECKIO(p1, p2) \|\| p_trespass(ap->a_cred, p2->p_ucred)) {
	201	error = EPERM;
	202	goto done;
	203	}
	204
	205	if (ap->a_mode & FWRITE)
	206	pfs->pfs_flags = ap->a_mode & (FWRITE\|O_EXCL);
	207
	208	break;
	209
	210	default:
	211	break;
	212	}
	213	error = vop_stdopen(ap);
	214	done:
	215	PRELE(p2);
	216	return error;
	217	}
	218
	219	/*
	220	* close the pfsnode (vp) after doing i/o.
	221	* (vp) is not locked on entry or exit.
	222	*
	223	* nothing to do for procfs other than undo
	224	* any exclusive open flag (see _open above).
	225	*
	226	* procfs_close(struct vnode a_vp, int a_fflag, struct ucred a_cred)
	227	*/
	228	static int
	229	procfs_close(struct vop_close_args *ap)
	230	{
	231	struct pfsnode *pfs = VTOPFS(ap->a_vp);
	232	struct proc *p;
	233
	234	switch (pfs->pfs_type) {
	235	case Pmem:
	236	if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
	237	pfs->pfs_flags &= ~(FWRITE\|O_EXCL);
	238	/*
	239	* v_opencount determines the last real close on the vnode.
	240	*
	241	* If this is the last close, then it checks to see if
	242	* the target process has PF_LINGER set in p_pfsflags,
	243	* if this is not the case, then the process' stop flags
	244	* are cleared, and the process is woken up. This is
	245	* to help prevent the case where a process has been
	246	* told to stop on an event, but then the requesting process
	247	* has gone away or forgotten about it.
	248	*/
	249	p = NULL;
	250	if ((ap->a_vp->v_opencount < 2)
	251	&& (p = pfind(pfs->pfs_pid))
	252	&& !(p->p_pfsflags & PF_LINGER)) {
	253	spin_lock(&p->p_spin);
	254	p->p_stops = 0;
	255	p->p_step = 0;
	256	spin_unlock(&p->p_spin);
	257	wakeup(&p->p_step);
	258	}
	259	if (p)
	260	PRELE(p);
	261	break;
	262	default:
	263	break;
	264	}
	265
	266	return (vop_stdclose(ap));
	267	}
	268
	269	/*
	270	* do an ioctl operation on a pfsnode (vp).
	271	* (vp) is not locked on entry or exit.
	272	*/
	273	static int
	274	procfs_ioctl(struct vop_ioctl_args *ap)
	275	{
	276	struct pfsnode *pfs = VTOPFS(ap->a_vp);
	277	struct proc *procp;
	278	struct proc *p;
	279	int error;
	280	int signo;
	281	struct procfs_status *psp;
	282	unsigned char flags;
	283
	284	procp = pfind(pfs->pfs_pid);
	285	if (procp == NULL)
	286	return ENOTTY;
	287	p = curproc;
	288	if (p == NULL) {
	289	error = EINVAL;
	290	goto done;
	291	}
	292
	293	/* Can't trace a process that's currently exec'ing. */
	294	if ((procp->p_flags & P_INEXEC) != 0) {
	295	error = EAGAIN;
	296	goto done;
	297	}
	298	if (!CHECKIO(p, procp) \|\| p_trespass(ap->a_cred, procp->p_ucred)) {
	299	error = EPERM;
	300	goto done;
	301	}
	302
	303	switch (ap->a_command) {
	304	case PIOCBIS:
	305	procp->p_stops \|= (unsigned int)ap->a_data;
	306	break;
	307	case PIOCBIC:
	308	procp->p_stops &= ~(unsigned int)ap->a_data;
	309	break;
	310	case PIOCSFL:
	311	/*
	312	* NFLAGS is "non-suser_xxx flags" -- currently, only
	313	* PFS_ISUGID ("ignore set u/g id");
	314	*/
	315	#define NFLAGS (PF_ISUGID)
	316	flags = (unsigned char)(unsigned int)ap->a_data;
	317	if (flags & NFLAGS && (error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0)))
	318	goto done;
	319	procp->p_pfsflags = flags;
	320	break;
	321	case PIOCGFL:
	322	(unsigned int)ap->a_data = (unsigned int)procp->p_pfsflags;
	323	break;
	324	case PIOCSTATUS:
	325	/*
	326	* NOTE: syscall entry deals with stopevents and may run without
	327	* the MP lock.
	328	*/
	329	psp = (struct procfs_status *)ap->a_data;
	330	psp->flags = procp->p_pfsflags;
	331	psp->events = procp->p_stops;
	332	spin_lock(&procp->p_spin);
	333	if (procp->p_step) {
	334	psp->state = 0;
	335	psp->why = procp->p_stype;
	336	psp->val = procp->p_xstat;
	337	spin_unlock(&procp->p_spin);
	338	} else {
	339	psp->state = 1;
	340	spin_unlock(&procp->p_spin);
	341	psp->why = 0; /* Not defined values */
	342	psp->val = 0; /* Not defined values */
	343	}
	344	break;
	345	case PIOCWAIT:
	346	/*
	347	* NOTE: syscall entry deals with stopevents and may run without
	348	* the MP lock.
	349	*/
	350	psp = (struct procfs_status *)ap->a_data;
	351	spin_lock(&procp->p_spin);
	352	while (procp->p_step == 0) {
	353	tsleep_interlock(&procp->p_stype, PCATCH);
	354	spin_unlock(&procp->p_spin);
	355	error = tsleep(&procp->p_stype, PCATCH \| PINTERLOCKED, "piocwait", 0);
	356	if (error)
	357	goto done;
	358	spin_lock(&procp->p_spin);
	359	}
	360	spin_unlock(&procp->p_spin);
	361	psp->state = 1; /* It stopped */
	362	psp->flags = procp->p_pfsflags;
	363	psp->events = procp->p_stops;
	364	psp->why = procp->p_stype; /* why it stopped */
	365	psp->val = procp->p_xstat; /* any extra info */
	366	break;
	367	case PIOCCONT: /* Restart a proc */
	368	/*
	369	* NOTE: syscall entry deals with stopevents and may run without
	370	* the MP lock. However, the caller is presumably interlocked
	371	* by having waited.
	372	*/
	373	if (procp->p_step == 0) {
	374	error = EINVAL; /* Can only start a stopped process */
	375	goto done;
	376	}
	377	if ((signo = (int)ap->a_data) != 0) {
	378	if (signo >= NSIG \|\| signo <= 0) {
	379	error = EINVAL;
	380	goto done;
	381	}
	382	ksignal(procp, signo);
	383	}
	384	procp->p_step = 0;
	385	wakeup(&procp->p_step);
	386	break;
	387	default:
	388	error = ENOTTY;
	389	goto done;
	390	}
	391	error = 0;
	392	done:
	393	PRELE(procp);
	394	return 0;
	395	}
	396
	397	/*
	398	* do block mapping for pfsnode (vp).
	399	* since we don't use the buffer cache
	400	* for procfs this function should never
	401	* be called. in any case, it's not clear
	402	* what part of the kernel ever makes use
	403	* of this function. for sanity, this is the
	404	* usual no-op bmap, although returning
	405	* (EIO) would be a reasonable alternative.
	406	*
	407	* XXX mmap assumes buffer cache operation
	408	*
	409	* procfs_bmap(struct vnode *a_vp, off_t a_loffset,
	410	* off_t a_doffsetp, int a_runp, int *a_runb)
	411	*/
	412	static int
	413	procfs_bmap(struct vop_bmap_args *ap)
	414	{
	415	if (ap->a_doffsetp != NULL)
	416	*ap->a_doffsetp = ap->a_loffset;
	417	if (ap->a_runp != NULL)
	418	*ap->a_runp = 0;
	419	if (ap->a_runb != NULL)
	420	*ap->a_runb = 0;
	421	return (0);
	422	}
	423
	424	/*
	425	* procfs_inactive is called when the pfsnode
	426	* is vrele'd and the reference count goes
	427	* to zero. (vp) will be on the vnode free
	428	* list, so to get it back vget() must be
	429	* used.
	430	*
	431	* (vp) is locked on entry, but must be unlocked on exit.
	432	*
	433	* procfs_inactive(struct vnode *a_vp)
	434	*/
	435	static int
	436	procfs_inactive(struct vop_inactive_args *ap)
	437	{
	438	struct pfsnode *pfs = VTOPFS(ap->a_vp);
	439
	440	if (pfs->pfs_pid & PFS_DEAD)
	441	vrecycle(ap->a_vp);
	442	return (0);
	443	}
	444
	445	/*
	446	* _reclaim is called when getnewvnode()
	447	* wants to make use of an entry on the vnode
	448	* free list. at this time the filesystem needs
	449	* to free any private data and remove the node
	450	* from any private lists.
	451	*
	452	* procfs_reclaim(struct vnode *a_vp)
	453	*/
	454	static int
	455	procfs_reclaim(struct vop_reclaim_args *ap)
	456	{
	457	return (procfs_freevp(ap->a_vp));
	458	}
	459
	460	/*
	461	* _print is used for debugging.
	462	* just print a readable description
	463	* of (vp).
	464	*
	465	* procfs_print(struct vnode *a_vp)
	466	*/
	467	static int
	468	procfs_print(struct vop_print_args *ap)
	469	{
	470	struct pfsnode *pfs = VTOPFS(ap->a_vp);
	471
	472	kprintf("tag VT_PROCFS, type %d, pid %ld, mode %x, flags %lx\n",
	473	pfs->pfs_type, (long)pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
	474	return (0);
	475	}
	476
	477	/*
	478	* generic entry point for unsupported operations
	479	*/
	480	static int
	481	procfs_badop(struct vop_generic_args *ap)
	482	{
	483	return (EIO);
	484	}
	485
	486	/*
	487	* Invent attributes for pfsnode (vp) and store
	488	* them in (vap).
	489	* Directories lengths are returned as zero since
	490	* any real length would require the genuine size
	491	* to be computed, and nothing cares anyway.
	492	*
	493	* this is relatively minimal for procfs.
	494	*
	495	* procfs_getattr(struct vnode a_vp, struct vattr a_vap)
	496	*/
	497	static int
	498	procfs_getattr(struct vop_getattr_args *ap)
	499	{
	500	struct pfsnode *pfs = VTOPFS(ap->a_vp);
	501	struct vattr *vap = ap->a_vap;
	502	struct proc *procp;
	503	int error;
	504
	505	/*
	506	* First make sure that the process and its credentials
	507	* still exist.
	508	*/
	509	switch (pfs->pfs_type) {
	510	case Proot:
	511	case Pcurproc:
	512	procp = NULL;
	513	break;
	514
	515	default:
	516	procp = pfs_pfind(pfs->pfs_pid);
	517	if (procp == NULL \|\| procp->p_ucred == NULL) {
	518	error = ENOENT;
	519	goto done;
	520	}
	521	}
	522
	523	error = 0;
	524
	525	/* start by zeroing out the attributes */
	526	VATTR_NULL(vap);
	527
	528	/* next do all the common fields */
	529	vap->va_type = ap->a_vp->v_type;
	530	vap->va_mode = pfs->pfs_mode;
	531	vap->va_fileid = pfs->pfs_fileno;
	532	vap->va_flags = 0;
	533	vap->va_blocksize = PAGE_SIZE;
	534	vap->va_bytes = vap->va_size = 0;
	535	vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
	536
	537	/*
	538	* Make all times be current TOD.
	539	* It would be possible to get the process start
	540	* time from the p_stat structure, but there's
	541	* no "file creation" time stamp anyway, and the
	542	* p_stat structure is not addressible if u. gets
	543	* swapped out for that process.
	544	*/
	545	nanotime(&vap->va_ctime);
	546	vap->va_atime = vap->va_mtime = vap->va_ctime;
	547
	548	/*
	549	* If the process has exercised some setuid or setgid
	550	* privilege, then rip away read/write permission so
	551	* that only root can gain access.
	552	*/
	553	switch (pfs->pfs_type) {
	554	case Pctl:
	555	case Pregs:
	556	case Pfpregs:
	557	case Pdbregs:
	558	case Pmem:
	559	if (procp->p_flags & P_SUGID)
	560	vap->va_mode &= ~((VREAD\|VWRITE)\|
	561	((VREAD\|VWRITE)>>3)\|
	562	((VREAD\|VWRITE)>>6));
	563	break;
	564	default:
	565	break;
	566	}
	567
	568	/*
	569	* now do the object specific fields
	570	*
	571	* The size could be set from struct reg, but it's hardly
	572	* worth the trouble, and it puts some (potentially) machine
	573	* dependent data into this machine-independent code. If it
	574	* becomes important then this function should break out into
	575	* a per-file stat function in the corresponding .c file.
	576	*/
	577
	578	vap->va_nlink = 1;
	579	if (procp) {
	580	vap->va_uid = procp->p_ucred->cr_uid;
	581	vap->va_gid = procp->p_ucred->cr_gid;
	582	}
	583
	584	switch (pfs->pfs_type) {
	585	case Proot:
	586	/*
	587	* Set nlink to 1 to tell fts(3) we don't actually know.
	588	*/
	589	vap->va_nlink = 1;
	590	vap->va_uid = 0;
	591	vap->va_gid = 0;
	592	vap->va_size = vap->va_bytes = DEV_BSIZE;
	593	break;
	594
	595	case Pcurproc: {
	596	char buf[16]; /* should be enough */
	597	vap->va_uid = 0;
	598	vap->va_gid = 0;
	599	vap->va_size = vap->va_bytes =
	600	ksnprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid);
	601	break;
	602	}
	603
	604	case Pproc:
	605	vap->va_nlink = nproc_targets;
	606	vap->va_size = vap->va_bytes = DEV_BSIZE;
	607	break;
	608
	609	case Pfile: {
	610	char fullpath, freepath;
	611	error = cache_fullpath(procp, &procp->p_textnch, &fullpath, &freepath, 0);
	612	if (error == 0) {
	613	vap->va_size = strlen(fullpath);
	614	kfree(freepath, M_TEMP);
	615	} else {
	616	vap->va_size = sizeof("unknown") - 1;
	617	error = 0;
	618	}
	619	vap->va_bytes = vap->va_size;
	620	break;
	621	}
	622
	623	case Pmem:
	624	/*
	625	* If we denied owner access earlier, then we have to
	626	* change the owner to root - otherwise 'ps' and friends
	627	* will break even though they are setgid kmem. SIGH
	628	*/
	629	if (procp->p_flags & P_SUGID)
	630	vap->va_uid = 0;
	631	else
	632	vap->va_uid = procp->p_ucred->cr_uid;
	633	break;
	634
	635	case Pregs:
	636	vap->va_bytes = vap->va_size = sizeof(struct reg);
	637	break;
	638
	639	case Pfpregs:
	640	vap->va_bytes = vap->va_size = sizeof(struct fpreg);
	641	break;
	642
	643	case Pdbregs:
	644	vap->va_bytes = vap->va_size = sizeof(struct dbreg);
	645	break;
	646
	647	case Ptype:
	648	case Pmap:
	649	case Pctl:
	650	case Pstatus:
	651	case Pnote:
	652	case Pnotepg:
	653	case Pcmdline:
	654	case Prlimit:
	655	break;
	656
	657	default:
	658	panic("procfs_getattr");
	659	}
	660	done:
	661	if (procp)
	662	PRELE(procp);
	663	return (error);
	664	}
	665
	666	/*
	667	* procfs_setattr(struct vnode a_vp, struct vattr a_vap,
	668	* struct ucred *a_cred)
	669	*/
	670	static int
	671	procfs_setattr(struct vop_setattr_args *ap)
	672	{
	673	if (ap->a_vap->va_flags != VNOVAL)
	674	return (EOPNOTSUPP);
	675
	676	/*
	677	* just fake out attribute setting
	678	* it's not good to generate an error
	679	* return, otherwise things like creat()
	680	* will fail when they try to set the
	681	* file length to 0. worse, this means
	682	* that echo $note > /proc/$pid/note will fail.
	683	*/
	684
	685	return (0);
	686	}
	687
	688	/*
	689	* implement access checking.
	690	*
	691	* procfs_access(struct vnode a_vp, int a_mode, struct ucred a_cred)
	692	*/
	693	static int
	694	procfs_access(struct vop_access_args *ap)
	695	{
	696	struct vattr vattr;
	697	int error;
	698
	699	error = VOP_GETATTR(ap->a_vp, &vattr);
	700	if (!error)
	701	error = vop_helper_access(ap, vattr.va_uid, vattr.va_gid,
	702	vattr.va_mode, 0);
	703	return (error);
	704	}
	705
	706	/*
	707	* lookup. this is incredibly complicated in the general case, however
	708	* for most pseudo-filesystems very little needs to be done.
	709	*
	710	* procfs_lookup(struct vnode a_dvp, struct vnode *a_vpp,
	711	* struct componentname *a_cnp)
	712	*/
	713	static int
	714	procfs_lookup(struct vop_old_lookup_args *ap)
	715	{
	716	struct componentname *cnp = ap->a_cnp;
	717	struct vnode **vpp = ap->a_vpp;
	718	struct vnode *dvp = ap->a_dvp;
	719	char *pname = cnp->cn_nameptr;
	720	/* struct proc curp = cnp->cn_proc; /
	721	struct proc_target *pt;
	722	pid_t pid;
	723	struct pfsnode *pfs;
	724	struct proc *p;
	725	struct lwp *lp;
	726	int i;
	727	int error;
	728
	729	*vpp = NULL;
	730
	731	if (cnp->cn_nameiop == NAMEI_DELETE \|\| cnp->cn_nameiop == NAMEI_RENAME)
	732	return (EROFS);
	733
	734	p = NULL;
	735	error = 0;
	736	if (cnp->cn_namelen == 1 && *pname == '.') {
	737	*vpp = dvp;
	738	vref(*vpp);
	739	goto out;
	740	}
	741
	742	pfs = VTOPFS(dvp);
	743	switch (pfs->pfs_type) {
	744	case Proot:
	745	if (cnp->cn_flags & CNP_ISDOTDOT)
	746	return (EIO);
	747
	748	if (CNEQ(cnp, "curproc", 7)) {
	749	error = procfs_allocvp(dvp->v_mount, vpp, 0, Pcurproc);
	750	goto out;
	751	}
	752
	753	pid = atopid(pname, cnp->cn_namelen);
	754	if (pid == NO_PID)
	755	break;
	756
	757	p = pfs_pfind(pid);
	758	if (p == NULL)
	759	break;
	760
	761	if (!PRISON_CHECK(ap->a_cnp->cn_cred, p->p_ucred))
	762	break;
	763
	764	if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 &&
	765	ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid)
	766	break;
	767
	768	error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc);
	769	goto out;
	770
	771	case Pproc:
	772	if (cnp->cn_flags & CNP_ISDOTDOT) {
	773	error = procfs_root(dvp->v_mount, vpp);
	774	goto out;
	775	}
	776
	777	p = pfs_pfind(pfs->pfs_pid);
	778	if (p == NULL)
	779	break;
	780	/* XXX lwp */
	781	lp = FIRST_LWP_IN_PROC(p);
	782
	783	if (!PRISON_CHECK(ap->a_cnp->cn_cred, p->p_ucred))
	784	break;
	785
	786	if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 &&
	787	ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid)
	788	break;
	789
	790	for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
	791	if (cnp->cn_namelen == pt->pt_namlen &&
	792	bcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
	793	(pt->pt_valid == NULL \|\| (*pt->pt_valid)(lp)))
	794	goto found;
	795	}
	796	break;
	797	found:
	798	error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
	799	pt->pt_pfstype);
	800	goto out;
	801
	802	default:
	803	error = ENOTDIR;
	804	goto out;
	805	}
	806	if (cnp->cn_nameiop == NAMEI_LOOKUP)
	807	error = ENOENT;
	808	else
	809	error = EROFS;
	810	/*
	811	* If no error occured *vpp will hold a referenced locked vnode.
	812	* dvp was passed to us locked and *vpp must be returned locked.
	813	* If *vpp != dvp then we should unlock dvp if (1) this is not the
	814	* last component or (2) CNP_LOCKPARENT is not set.
	815	*/
	816	out:
	817	if (error == 0 && *vpp != dvp) {
	818	if ((cnp->cn_flags & CNP_LOCKPARENT) == 0) {
	819	cnp->cn_flags \|= CNP_PDIRUNLOCK;
	820	vn_unlock(dvp);
	821	}
	822	}
	823	if (p)
	824	PRELE(p);
	825	return (error);
	826	}
	827
	828	/*
	829	* Does this process have a text file?
	830	*/
	831	int
	832	procfs_validfile(struct lwp *lp)
	833	{
	834	return (procfs_findtextvp(lp->lwp_proc) != NULLVP);
	835	}
	836
	837	/*
	838	* readdir() returns directory entries from pfsnode (vp).
	839	*
	840	* We generate just one directory entry at a time, as it would probably
	841	* not pay off to buffer several entries locally to save uiomove calls.
	842	*
	843	* procfs_readdir(struct vnode a_vp, struct uio a_uio, struct ucred *a_cred,
	844	* int a_eofflag, int a_ncookies, off_t **a_cookies)
	845	*/
	846	static int
	847	procfs_readdir(struct vop_readdir_args *ap)
	848	{
	849	struct pfsnode *pfs;
	850	int error;
	851
	852	if (ap->a_uio->uio_offset < 0 \|\| ap->a_uio->uio_offset > INT_MAX)
	853	return (EINVAL);
	854	if ((error = vn_lock(ap->a_vp, LK_EXCLUSIVE \| LK_RETRY)) != 0)
	855	return (error);
	856	pfs = VTOPFS(ap->a_vp);
	857
	858	switch (pfs->pfs_type) {
	859	case Pproc:
	860	/*
	861	* this is for the process-specific sub-directories.
	862	* all that is needed to is copy out all the entries
	863	* from the procent[] table (top of this file).
	864	*/
	865	error = procfs_readdir_proc(ap);
	866	break;
	867	case Proot:
	868	/*
	869	* this is for the root of the procfs filesystem
	870	* what is needed is a special entry for "curproc"
	871	* followed by an entry for each process on allproc
	872	*/
	873	error = procfs_readdir_root(ap);
	874	break;
	875	default:
	876	error = ENOTDIR;
	877	break;
	878	}
	879
	880	vn_unlock(ap->a_vp);
	881	return (error);
	882	}
	883
	884	static int
	885	procfs_readdir_proc(struct vop_readdir_args *ap)
	886	{
	887	struct pfsnode *pfs;
	888	int error, i, retval;
	889	struct proc *p;
	890	struct lwp *lp;
	891	struct proc_target *pt;
	892	struct uio *uio = ap->a_uio;
	893
	894	pfs = VTOPFS(ap->a_vp);
	895	p = pfs_pfind(pfs->pfs_pid);
	896	if (p == NULL)
	897	return(0);
	898	if (!PRISON_CHECK(ap->a_cred, p->p_ucred)) {
	899	error = 0;
	900	goto done;
	901	}
	902	/* XXX lwp, not MPSAFE */
	903	lp = FIRST_LWP_IN_PROC(p);
	904
	905	error = 0;
	906	i = (int)uio->uio_offset;
	907	if (i < 0) {
	908	error = EINVAL;
	909	goto done;
	910	}
	911
	912	for (pt = &proc_targets[i];
	913	!error && uio->uio_resid > 0 && i < nproc_targets; pt++, i++) {
	914	if (pt->pt_valid && (*pt->pt_valid)(lp) == 0)
	915	continue;
	916
	917	retval = vop_write_dirent(&error, uio,
	918	PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype), pt->pt_type,
	919	pt->pt_namlen, pt->pt_name);
	920	if (retval)
	921	break;
	922	}
	923
	924	uio->uio_offset = (off_t)i;
	925	error = 0;
	926	done:
	927	PRELE(p);
	928	return error;
	929	}
	930
	931	struct procfs_readdir_root_info {
	932	int error;
	933	int i;
	934	int pcnt;
	935	struct uio *uio;
	936	struct ucred *cred;
	937	};
	938
	939	static int procfs_readdir_root_callback(struct proc p, void data);
	940
	941	static int
	942	procfs_readdir_root(struct vop_readdir_args *ap)
	943	{
	944	struct procfs_readdir_root_info info;
	945	struct uio *uio = ap->a_uio;
	946	int res;
	947
	948	info.error = 0;
	949	info.i = (int)uio->uio_offset;
	950
	951	if (info.i < 0)
	952	return (EINVAL);
	953
	954	info.pcnt = 0;
	955	info.uio = uio;
	956	info.cred = ap->a_cred;
	957	while (info.pcnt < 3) {
	958	res = procfs_readdir_root_callback(NULL, &info);
	959	if (res < 0)
	960	break;
	961	}
	962	if (res >= 0)
	963	allproc_scan(procfs_readdir_root_callback, &info);
	964	uio->uio_offset = (off_t)info.i;
	965
	966	return (info.error);
	967	}
	968
	969	static int
	970	procfs_readdir_root_callback(struct proc p, void data)
	971	{
	972	struct procfs_readdir_root_info *info = data;
	973	struct uio *uio;
	974	int retval;
	975	ino_t d_ino;
	976	const char *d_name;
	977	char d_name_pid[20];
	978	size_t d_namlen;
	979	uint8_t d_type;
	980
	981	uio = info->uio;
	982
	983	if (uio->uio_resid <= 0 \|\| info->error)
	984	return(-1);
	985
	986	switch (info->pcnt) {
	987	case 0: /* `.' */
	988	d_ino = PROCFS_FILENO(0, Proot);
	989	d_name = ".";
	990	d_namlen = 1;
	991	d_type = DT_DIR;
	992	break;
	993	case 1: /* `..' */
	994	d_ino = PROCFS_FILENO(0, Proot);
	995	d_name = "..";
	996	d_namlen = 2;
	997	d_type = DT_DIR;
	998	break;
	999
	1000	case 2:
	1001	d_ino = PROCFS_FILENO(0, Pcurproc);
	1002	d_namlen = 7;
	1003	d_name = "curproc";
	1004	d_type = DT_LNK;
	1005	break;
	1006
	1007
	1008	default:
	1009	if (!PRISON_CHECK(info->cred, p->p_ucred))
	1010	return(0);
	1011	if (ps_showallprocs == 0 &&
	1012	info->cred->cr_uid != 0 &&
	1013	info->cred->cr_uid != p->p_ucred->cr_uid) {
	1014	return(0);
	1015	}
	1016
	1017	/*
	1018	* Skip entries we have already returned (optimization)
	1019	*/
	1020	if (info->pcnt < info->i) {
	1021	++info->pcnt;
	1022	return(0);
	1023	}
	1024
	1025	d_ino = PROCFS_FILENO(p->p_pid, Pproc);
	1026	d_namlen = ksnprintf(d_name_pid, sizeof(d_name_pid),
	1027	"%ld", (long)p->p_pid);
	1028	d_name = d_name_pid;
	1029	d_type = DT_DIR;
	1030	break;
	1031	}
	1032
	1033	/*
	1034	* Skip entries we have already returned (optimization)
	1035	*/
	1036	if (info->pcnt < info->i) {
	1037	++info->pcnt;
	1038	return(0);
	1039	}
	1040
	1041	retval = vop_write_dirent(&info->error, uio,
	1042	d_ino, d_type, d_namlen, d_name);
	1043	if (retval)
	1044	return(-1);
	1045	++info->pcnt;
	1046	++info->i;
	1047	return(0);
	1048	}
	1049
	1050	/*
	1051	* readlink reads the link of `curproc' or `file'
	1052	*/
	1053	static int
	1054	procfs_readlink(struct vop_readlink_args *ap)
	1055	{
	1056	char buf[16]; /* should be enough */
	1057	struct proc *procp;
	1058	struct vnode *vp = ap->a_vp;
	1059	struct pfsnode *pfs = VTOPFS(vp);
	1060	char fullpath, freepath;
	1061	int error, len;
	1062
	1063	switch (pfs->pfs_type) {
	1064	case Pcurproc:
	1065	if (pfs->pfs_fileno != PROCFS_FILENO(0, Pcurproc))
	1066	return (EINVAL);
	1067
	1068	len = ksnprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid);
	1069
	1070	return (uiomove(buf, len, ap->a_uio));
	1071	/*
	1072	* There _should_ be no way for an entire process to disappear
	1073	* from under us...
	1074	*/
	1075	case Pfile:
	1076	procp = pfs_pfind(pfs->pfs_pid);
	1077	if (procp == NULL \|\| procp->p_ucred == NULL) {
	1078	kprintf("procfs_readlink: pid %d disappeared\n",
	1079	pfs->pfs_pid);
	1080	if (procp)
	1081	PRELE(procp);
	1082	return (uiomove("unknown", sizeof("unknown") - 1,
	1083	ap->a_uio));
	1084	}
	1085	error = cache_fullpath(procp, &procp->p_textnch, &fullpath, &freepath, 0);
	1086	if (error != 0) {
	1087	if (procp)
	1088	PRELE(procp);
	1089	return (uiomove("unknown", sizeof("unknown") - 1,
	1090	ap->a_uio));
	1091	}
	1092	error = uiomove(fullpath, strlen(fullpath), ap->a_uio);
	1093	kfree(freepath, M_TEMP);
	1094	if (procp)
	1095	PRELE(procp);
	1096	return (error);
	1097	default:
	1098	return (EINVAL);
	1099	}
	1100	}
	1101
	1102	/*
	1103	* convert decimal ascii to pid_t
	1104	*/
	1105	static pid_t
	1106	atopid(const char *b, u_int len)
	1107	{
	1108	pid_t p = 0;
	1109
	1110	while (len--) {
	1111	char c = *b++;
	1112	if (c < '0' \|\| c > '9')
	1113	return (NO_PID);
	1114	p = 10 * p + (c - '0');
	1115	if (p > PID_MAX)
	1116	return (NO_PID);
	1117	}
	1118
	1119	return (p);
	1120	}
	1121