| 1 | /* |
| 2 | * Copyright (c) 1993, 1995 Jan-Simon Pendry |
| 3 | * Copyright (c) 1993, 1995 |
| 4 | * The Regents of the University of California. All rights reserved. |
| 5 | * |
| 6 | * This code is derived from software contributed to Berkeley by |
| 7 | * Jan-Simon Pendry. |
| 8 | * |
| 9 | * Redistribution and use in source and binary forms, with or without |
| 10 | * modification, are permitted provided that the following conditions |
| 11 | * are met: |
| 12 | * 1. Redistributions of source code must retain the above copyright |
| 13 | * notice, this list of conditions and the following disclaimer. |
| 14 | * 2. Redistributions in binary form must reproduce the above copyright |
| 15 | * notice, this list of conditions and the following disclaimer in the |
| 16 | * documentation and/or other materials provided with the distribution. |
| 17 | * 3. All advertising materials mentioning features or use of this software |
| 18 | * must display the following acknowledgement: |
| 19 | * This product includes software developed by the University of |
| 20 | * California, Berkeley and its contributors. |
| 21 | * 4. Neither the name of the University nor the names of its contributors |
| 22 | * may be used to endorse or promote products derived from this software |
| 23 | * without specific prior written permission. |
| 24 | * |
| 25 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 26 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 27 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 28 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 29 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 30 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 31 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 32 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 33 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 34 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 35 | * SUCH DAMAGE. |
| 36 | * |
| 37 | * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 |
| 38 | * |
| 39 | * $FreeBSD: src/sys/miscfs/procfs/procfs_vnops.c,v 1.76.2.7 2002/01/22 17:22:59 nectar Exp $ |
| 40 | * $DragonFly: src/sys/vfs/procfs/procfs_vnops.c,v 1.2 2003/06/17 04:28:42 dillon Exp $ |
| 41 | */ |
| 42 | |
| 43 | /* |
| 44 | * procfs vnode interface |
| 45 | */ |
| 46 | |
| 47 | #include <sys/param.h> |
| 48 | #include <sys/systm.h> |
| 49 | #include <sys/time.h> |
| 50 | #include <sys/kernel.h> |
| 51 | #include <sys/lock.h> |
| 52 | #include <sys/fcntl.h> |
| 53 | #include <sys/proc.h> |
| 54 | #include <sys/signalvar.h> |
| 55 | #include <sys/vnode.h> |
| 56 | #include <sys/uio.h> |
| 57 | #include <sys/mount.h> |
| 58 | #include <sys/namei.h> |
| 59 | #include <sys/dirent.h> |
| 60 | #include <sys/malloc.h> |
| 61 | #include <machine/reg.h> |
| 62 | #include <vm/vm_zone.h> |
| 63 | #include <miscfs/procfs/procfs.h> |
| 64 | #include <sys/pioctl.h> |
| 65 | |
| 66 | static int procfs_access __P((struct vop_access_args *)); |
| 67 | static int procfs_badop __P((void)); |
| 68 | static int procfs_bmap __P((struct vop_bmap_args *)); |
| 69 | static int procfs_close __P((struct vop_close_args *)); |
| 70 | static int procfs_getattr __P((struct vop_getattr_args *)); |
| 71 | static int procfs_inactive __P((struct vop_inactive_args *)); |
| 72 | static int procfs_ioctl __P((struct vop_ioctl_args *)); |
| 73 | static int procfs_lookup __P((struct vop_lookup_args *)); |
| 74 | static int procfs_open __P((struct vop_open_args *)); |
| 75 | static int procfs_print __P((struct vop_print_args *)); |
| 76 | static int procfs_readdir __P((struct vop_readdir_args *)); |
| 77 | static int procfs_readlink __P((struct vop_readlink_args *)); |
| 78 | static int procfs_reclaim __P((struct vop_reclaim_args *)); |
| 79 | static int procfs_setattr __P((struct vop_setattr_args *)); |
| 80 | |
| 81 | /* |
| 82 | * This is a list of the valid names in the |
| 83 | * process-specific sub-directories. It is |
| 84 | * used in procfs_lookup and procfs_readdir |
| 85 | */ |
| 86 | static struct proc_target { |
| 87 | u_char pt_type; |
| 88 | u_char pt_namlen; |
| 89 | char *pt_name; |
| 90 | pfstype pt_pfstype; |
| 91 | int (*pt_valid) __P((struct proc *p)); |
| 92 | } proc_targets[] = { |
| 93 | #define N(s) sizeof(s)-1, s |
| 94 | /* name type validp */ |
| 95 | { DT_DIR, N("."), Pproc, NULL }, |
| 96 | { DT_DIR, N(".."), Proot, NULL }, |
| 97 | { DT_REG, N("mem"), Pmem, NULL }, |
| 98 | { DT_REG, N("regs"), Pregs, procfs_validregs }, |
| 99 | { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs }, |
| 100 | { DT_REG, N("dbregs"), Pdbregs, procfs_validdbregs }, |
| 101 | { DT_REG, N("ctl"), Pctl, NULL }, |
| 102 | { DT_REG, N("status"), Pstatus, NULL }, |
| 103 | { DT_REG, N("note"), Pnote, NULL }, |
| 104 | { DT_REG, N("notepg"), Pnotepg, NULL }, |
| 105 | { DT_REG, N("map"), Pmap, procfs_validmap }, |
| 106 | { DT_REG, N("etype"), Ptype, procfs_validtype }, |
| 107 | { DT_REG, N("cmdline"), Pcmdline, NULL }, |
| 108 | { DT_REG, N("rlimit"), Prlimit, NULL }, |
| 109 | { DT_LNK, N("file"), Pfile, NULL }, |
| 110 | #undef N |
| 111 | }; |
| 112 | static const int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]); |
| 113 | |
| 114 | static pid_t atopid __P((const char *, u_int)); |
| 115 | |
| 116 | /* |
| 117 | * set things up for doing i/o on |
| 118 | * the pfsnode (vp). (vp) is locked |
| 119 | * on entry, and should be left locked |
| 120 | * on exit. |
| 121 | * |
| 122 | * for procfs we don't need to do anything |
| 123 | * in particular for i/o. all that is done |
| 124 | * is to support exclusive open on process |
| 125 | * memory images. |
| 126 | */ |
| 127 | static int |
| 128 | procfs_open(ap) |
| 129 | struct vop_open_args /* { |
| 130 | struct vnode *a_vp; |
| 131 | int a_mode; |
| 132 | struct ucred *a_cred; |
| 133 | struct proc *a_p; |
| 134 | } */ *ap; |
| 135 | { |
| 136 | struct pfsnode *pfs = VTOPFS(ap->a_vp); |
| 137 | struct proc *p1, *p2; |
| 138 | |
| 139 | p2 = PFIND(pfs->pfs_pid); |
| 140 | if (p2 == NULL) |
| 141 | return (ENOENT); |
| 142 | if (pfs->pfs_pid && !PRISON_CHECK(ap->a_p, p2)) |
| 143 | return (ENOENT); |
| 144 | |
| 145 | switch (pfs->pfs_type) { |
| 146 | case Pmem: |
| 147 | if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) || |
| 148 | ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) |
| 149 | return (EBUSY); |
| 150 | |
| 151 | p1 = ap->a_p; |
| 152 | /* Can't trace a process that's currently exec'ing. */ |
| 153 | if ((p2->p_flag & P_INEXEC) != 0) |
| 154 | return EAGAIN; |
| 155 | if (!CHECKIO(p1, p2) || p_trespass(p1, p2)) |
| 156 | return (EPERM); |
| 157 | |
| 158 | if (ap->a_mode & FWRITE) |
| 159 | pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL); |
| 160 | |
| 161 | return (0); |
| 162 | |
| 163 | default: |
| 164 | break; |
| 165 | } |
| 166 | |
| 167 | return (0); |
| 168 | } |
| 169 | |
| 170 | /* |
| 171 | * close the pfsnode (vp) after doing i/o. |
| 172 | * (vp) is not locked on entry or exit. |
| 173 | * |
| 174 | * nothing to do for procfs other than undo |
| 175 | * any exclusive open flag (see _open above). |
| 176 | */ |
| 177 | static int |
| 178 | procfs_close(ap) |
| 179 | struct vop_close_args /* { |
| 180 | struct vnode *a_vp; |
| 181 | int a_fflag; |
| 182 | struct ucred *a_cred; |
| 183 | struct proc *a_p; |
| 184 | } */ *ap; |
| 185 | { |
| 186 | struct pfsnode *pfs = VTOPFS(ap->a_vp); |
| 187 | struct proc *p; |
| 188 | |
| 189 | switch (pfs->pfs_type) { |
| 190 | case Pmem: |
| 191 | if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL)) |
| 192 | pfs->pfs_flags &= ~(FWRITE|O_EXCL); |
| 193 | /* |
| 194 | * This rather complicated-looking code is trying to |
| 195 | * determine if this was the last close on this particular |
| 196 | * vnode. While one would expect v_usecount to be 1 at |
| 197 | * that point, it seems that (according to John Dyson) |
| 198 | * the VM system will bump up the usecount. So: if the |
| 199 | * usecount is 2, and VOBJBUF is set, then this is really |
| 200 | * the last close. Otherwise, if the usecount is < 2 |
| 201 | * then it is definitely the last close. |
| 202 | * If this is the last close, then it checks to see if |
| 203 | * the target process has PF_LINGER set in p_pfsflags, |
| 204 | * if this is *not* the case, then the process' stop flags |
| 205 | * are cleared, and the process is woken up. This is |
| 206 | * to help prevent the case where a process has been |
| 207 | * told to stop on an event, but then the requesting process |
| 208 | * has gone away or forgotten about it. |
| 209 | */ |
| 210 | if ((ap->a_vp->v_usecount < 2) |
| 211 | && (p = pfind(pfs->pfs_pid)) |
| 212 | && !(p->p_pfsflags & PF_LINGER)) { |
| 213 | p->p_stops = 0; |
| 214 | p->p_step = 0; |
| 215 | wakeup(&p->p_step); |
| 216 | } |
| 217 | break; |
| 218 | default: |
| 219 | break; |
| 220 | } |
| 221 | |
| 222 | return (0); |
| 223 | } |
| 224 | |
| 225 | /* |
| 226 | * do an ioctl operation on a pfsnode (vp). |
| 227 | * (vp) is not locked on entry or exit. |
| 228 | */ |
| 229 | static int |
| 230 | procfs_ioctl(ap) |
| 231 | struct vop_ioctl_args *ap; |
| 232 | { |
| 233 | struct pfsnode *pfs = VTOPFS(ap->a_vp); |
| 234 | struct proc *procp, *p; |
| 235 | int error; |
| 236 | int signo; |
| 237 | struct procfs_status *psp; |
| 238 | unsigned char flags; |
| 239 | |
| 240 | p = ap->a_p; |
| 241 | procp = pfind(pfs->pfs_pid); |
| 242 | if (procp == NULL) { |
| 243 | return ENOTTY; |
| 244 | } |
| 245 | |
| 246 | /* Can't trace a process that's currently exec'ing. */ |
| 247 | if ((procp->p_flag & P_INEXEC) != 0) |
| 248 | return EAGAIN; |
| 249 | if (!CHECKIO(p, procp) || p_trespass(p, procp)) |
| 250 | return EPERM; |
| 251 | |
| 252 | switch (ap->a_command) { |
| 253 | case PIOCBIS: |
| 254 | procp->p_stops |= *(unsigned int*)ap->a_data; |
| 255 | break; |
| 256 | case PIOCBIC: |
| 257 | procp->p_stops &= ~*(unsigned int*)ap->a_data; |
| 258 | break; |
| 259 | case PIOCSFL: |
| 260 | /* |
| 261 | * NFLAGS is "non-suser_xxx flags" -- currently, only |
| 262 | * PFS_ISUGID ("ignore set u/g id"); |
| 263 | */ |
| 264 | #define NFLAGS (PF_ISUGID) |
| 265 | flags = (unsigned char)*(unsigned int*)ap->a_data; |
| 266 | if (flags & NFLAGS && (error = suser(p))) |
| 267 | return error; |
| 268 | procp->p_pfsflags = flags; |
| 269 | break; |
| 270 | case PIOCGFL: |
| 271 | *(unsigned int*)ap->a_data = (unsigned int)procp->p_pfsflags; |
| 272 | break; |
| 273 | case PIOCSTATUS: |
| 274 | psp = (struct procfs_status *)ap->a_data; |
| 275 | psp->state = (procp->p_step == 0); |
| 276 | psp->flags = procp->p_pfsflags; |
| 277 | psp->events = procp->p_stops; |
| 278 | if (procp->p_step) { |
| 279 | psp->why = procp->p_stype; |
| 280 | psp->val = procp->p_xstat; |
| 281 | } else { |
| 282 | psp->why = psp->val = 0; /* Not defined values */ |
| 283 | } |
| 284 | break; |
| 285 | case PIOCWAIT: |
| 286 | psp = (struct procfs_status *)ap->a_data; |
| 287 | if (procp->p_step == 0) { |
| 288 | error = tsleep(&procp->p_stype, PWAIT | PCATCH, "piocwait", 0); |
| 289 | if (error) |
| 290 | return error; |
| 291 | } |
| 292 | psp->state = 1; /* It stopped */ |
| 293 | psp->flags = procp->p_pfsflags; |
| 294 | psp->events = procp->p_stops; |
| 295 | psp->why = procp->p_stype; /* why it stopped */ |
| 296 | psp->val = procp->p_xstat; /* any extra info */ |
| 297 | break; |
| 298 | case PIOCCONT: /* Restart a proc */ |
| 299 | if (procp->p_step == 0) |
| 300 | return EINVAL; /* Can only start a stopped process */ |
| 301 | if ((signo = *(int*)ap->a_data) != 0) { |
| 302 | if (signo >= NSIG || signo <= 0) |
| 303 | return EINVAL; |
| 304 | psignal(procp, signo); |
| 305 | } |
| 306 | procp->p_step = 0; |
| 307 | wakeup(&procp->p_step); |
| 308 | break; |
| 309 | default: |
| 310 | return (ENOTTY); |
| 311 | } |
| 312 | return 0; |
| 313 | } |
| 314 | |
| 315 | /* |
| 316 | * do block mapping for pfsnode (vp). |
| 317 | * since we don't use the buffer cache |
| 318 | * for procfs this function should never |
| 319 | * be called. in any case, it's not clear |
| 320 | * what part of the kernel ever makes use |
| 321 | * of this function. for sanity, this is the |
| 322 | * usual no-op bmap, although returning |
| 323 | * (EIO) would be a reasonable alternative. |
| 324 | */ |
| 325 | static int |
| 326 | procfs_bmap(ap) |
| 327 | struct vop_bmap_args /* { |
| 328 | struct vnode *a_vp; |
| 329 | daddr_t a_bn; |
| 330 | struct vnode **a_vpp; |
| 331 | daddr_t *a_bnp; |
| 332 | int *a_runp; |
| 333 | } */ *ap; |
| 334 | { |
| 335 | |
| 336 | if (ap->a_vpp != NULL) |
| 337 | *ap->a_vpp = ap->a_vp; |
| 338 | if (ap->a_bnp != NULL) |
| 339 | *ap->a_bnp = ap->a_bn; |
| 340 | if (ap->a_runp != NULL) |
| 341 | *ap->a_runp = 0; |
| 342 | return (0); |
| 343 | } |
| 344 | |
| 345 | /* |
| 346 | * procfs_inactive is called when the pfsnode |
| 347 | * is vrele'd and the reference count goes |
| 348 | * to zero. (vp) will be on the vnode free |
| 349 | * list, so to get it back vget() must be |
| 350 | * used. |
| 351 | * |
| 352 | * (vp) is locked on entry, but must be unlocked on exit. |
| 353 | */ |
| 354 | static int |
| 355 | procfs_inactive(ap) |
| 356 | struct vop_inactive_args /* { |
| 357 | struct vnode *a_vp; |
| 358 | } */ *ap; |
| 359 | { |
| 360 | struct vnode *vp = ap->a_vp; |
| 361 | |
| 362 | VOP_UNLOCK(vp, 0, ap->a_p); |
| 363 | |
| 364 | return (0); |
| 365 | } |
| 366 | |
| 367 | /* |
| 368 | * _reclaim is called when getnewvnode() |
| 369 | * wants to make use of an entry on the vnode |
| 370 | * free list. at this time the filesystem needs |
| 371 | * to free any private data and remove the node |
| 372 | * from any private lists. |
| 373 | */ |
| 374 | static int |
| 375 | procfs_reclaim(ap) |
| 376 | struct vop_reclaim_args /* { |
| 377 | struct vnode *a_vp; |
| 378 | } */ *ap; |
| 379 | { |
| 380 | |
| 381 | return (procfs_freevp(ap->a_vp)); |
| 382 | } |
| 383 | |
| 384 | /* |
| 385 | * _print is used for debugging. |
| 386 | * just print a readable description |
| 387 | * of (vp). |
| 388 | */ |
| 389 | static int |
| 390 | procfs_print(ap) |
| 391 | struct vop_print_args /* { |
| 392 | struct vnode *a_vp; |
| 393 | } */ *ap; |
| 394 | { |
| 395 | struct pfsnode *pfs = VTOPFS(ap->a_vp); |
| 396 | |
| 397 | printf("tag VT_PROCFS, type %d, pid %ld, mode %x, flags %lx\n", |
| 398 | pfs->pfs_type, (long)pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags); |
| 399 | return (0); |
| 400 | } |
| 401 | |
| 402 | /* |
| 403 | * generic entry point for unsupported operations |
| 404 | */ |
| 405 | static int |
| 406 | procfs_badop() |
| 407 | { |
| 408 | |
| 409 | return (EIO); |
| 410 | } |
| 411 | |
| 412 | /* |
| 413 | * Invent attributes for pfsnode (vp) and store |
| 414 | * them in (vap). |
| 415 | * Directories lengths are returned as zero since |
| 416 | * any real length would require the genuine size |
| 417 | * to be computed, and nothing cares anyway. |
| 418 | * |
| 419 | * this is relatively minimal for procfs. |
| 420 | */ |
| 421 | static int |
| 422 | procfs_getattr(ap) |
| 423 | struct vop_getattr_args /* { |
| 424 | struct vnode *a_vp; |
| 425 | struct vattr *a_vap; |
| 426 | struct ucred *a_cred; |
| 427 | struct proc *a_p; |
| 428 | } */ *ap; |
| 429 | { |
| 430 | struct pfsnode *pfs = VTOPFS(ap->a_vp); |
| 431 | struct vattr *vap = ap->a_vap; |
| 432 | struct proc *procp; |
| 433 | int error; |
| 434 | |
| 435 | /* |
| 436 | * First make sure that the process and its credentials |
| 437 | * still exist. |
| 438 | */ |
| 439 | switch (pfs->pfs_type) { |
| 440 | case Proot: |
| 441 | case Pcurproc: |
| 442 | procp = 0; |
| 443 | break; |
| 444 | |
| 445 | default: |
| 446 | procp = PFIND(pfs->pfs_pid); |
| 447 | if (procp == NULL || procp->p_cred == NULL || |
| 448 | procp->p_ucred == NULL) |
| 449 | return (ENOENT); |
| 450 | } |
| 451 | |
| 452 | error = 0; |
| 453 | |
| 454 | /* start by zeroing out the attributes */ |
| 455 | VATTR_NULL(vap); |
| 456 | |
| 457 | /* next do all the common fields */ |
| 458 | vap->va_type = ap->a_vp->v_type; |
| 459 | vap->va_mode = pfs->pfs_mode; |
| 460 | vap->va_fileid = pfs->pfs_fileno; |
| 461 | vap->va_flags = 0; |
| 462 | vap->va_blocksize = PAGE_SIZE; |
| 463 | vap->va_bytes = vap->va_size = 0; |
| 464 | vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; |
| 465 | |
| 466 | /* |
| 467 | * Make all times be current TOD. |
| 468 | * It would be possible to get the process start |
| 469 | * time from the p_stat structure, but there's |
| 470 | * no "file creation" time stamp anyway, and the |
| 471 | * p_stat structure is not addressible if u. gets |
| 472 | * swapped out for that process. |
| 473 | */ |
| 474 | nanotime(&vap->va_ctime); |
| 475 | vap->va_atime = vap->va_mtime = vap->va_ctime; |
| 476 | |
| 477 | /* |
| 478 | * If the process has exercised some setuid or setgid |
| 479 | * privilege, then rip away read/write permission so |
| 480 | * that only root can gain access. |
| 481 | */ |
| 482 | switch (pfs->pfs_type) { |
| 483 | case Pctl: |
| 484 | case Pregs: |
| 485 | case Pfpregs: |
| 486 | case Pdbregs: |
| 487 | case Pmem: |
| 488 | if (procp->p_flag & P_SUGID) |
| 489 | vap->va_mode &= ~((VREAD|VWRITE)| |
| 490 | ((VREAD|VWRITE)>>3)| |
| 491 | ((VREAD|VWRITE)>>6)); |
| 492 | break; |
| 493 | default: |
| 494 | break; |
| 495 | } |
| 496 | |
| 497 | /* |
| 498 | * now do the object specific fields |
| 499 | * |
| 500 | * The size could be set from struct reg, but it's hardly |
| 501 | * worth the trouble, and it puts some (potentially) machine |
| 502 | * dependent data into this machine-independent code. If it |
| 503 | * becomes important then this function should break out into |
| 504 | * a per-file stat function in the corresponding .c file. |
| 505 | */ |
| 506 | |
| 507 | vap->va_nlink = 1; |
| 508 | if (procp) { |
| 509 | vap->va_uid = procp->p_ucred->cr_uid; |
| 510 | vap->va_gid = procp->p_ucred->cr_gid; |
| 511 | } |
| 512 | |
| 513 | switch (pfs->pfs_type) { |
| 514 | case Proot: |
| 515 | /* |
| 516 | * Set nlink to 1 to tell fts(3) we don't actually know. |
| 517 | */ |
| 518 | vap->va_nlink = 1; |
| 519 | vap->va_uid = 0; |
| 520 | vap->va_gid = 0; |
| 521 | vap->va_size = vap->va_bytes = DEV_BSIZE; |
| 522 | break; |
| 523 | |
| 524 | case Pcurproc: { |
| 525 | char buf[16]; /* should be enough */ |
| 526 | vap->va_uid = 0; |
| 527 | vap->va_gid = 0; |
| 528 | vap->va_size = vap->va_bytes = |
| 529 | snprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid); |
| 530 | break; |
| 531 | } |
| 532 | |
| 533 | case Pproc: |
| 534 | vap->va_nlink = nproc_targets; |
| 535 | vap->va_size = vap->va_bytes = DEV_BSIZE; |
| 536 | break; |
| 537 | |
| 538 | case Pfile: { |
| 539 | char *fullpath, *freepath; |
| 540 | error = textvp_fullpath(procp, &fullpath, &freepath); |
| 541 | if (error == 0) { |
| 542 | vap->va_size = strlen(fullpath); |
| 543 | free(freepath, M_TEMP); |
| 544 | } else { |
| 545 | vap->va_size = sizeof("unknown") - 1; |
| 546 | error = 0; |
| 547 | } |
| 548 | vap->va_bytes = vap->va_size; |
| 549 | break; |
| 550 | } |
| 551 | |
| 552 | case Pmem: |
| 553 | /* |
| 554 | * If we denied owner access earlier, then we have to |
| 555 | * change the owner to root - otherwise 'ps' and friends |
| 556 | * will break even though they are setgid kmem. *SIGH* |
| 557 | */ |
| 558 | if (procp->p_flag & P_SUGID) |
| 559 | vap->va_uid = 0; |
| 560 | else |
| 561 | vap->va_uid = procp->p_ucred->cr_uid; |
| 562 | break; |
| 563 | |
| 564 | case Pregs: |
| 565 | vap->va_bytes = vap->va_size = sizeof(struct reg); |
| 566 | break; |
| 567 | |
| 568 | case Pfpregs: |
| 569 | vap->va_bytes = vap->va_size = sizeof(struct fpreg); |
| 570 | break; |
| 571 | |
| 572 | case Pdbregs: |
| 573 | vap->va_bytes = vap->va_size = sizeof(struct dbreg); |
| 574 | break; |
| 575 | |
| 576 | case Ptype: |
| 577 | case Pmap: |
| 578 | case Pctl: |
| 579 | case Pstatus: |
| 580 | case Pnote: |
| 581 | case Pnotepg: |
| 582 | case Pcmdline: |
| 583 | case Prlimit: |
| 584 | break; |
| 585 | |
| 586 | default: |
| 587 | panic("procfs_getattr"); |
| 588 | } |
| 589 | |
| 590 | return (error); |
| 591 | } |
| 592 | |
| 593 | static int |
| 594 | procfs_setattr(ap) |
| 595 | struct vop_setattr_args /* { |
| 596 | struct vnode *a_vp; |
| 597 | struct vattr *a_vap; |
| 598 | struct ucred *a_cred; |
| 599 | struct proc *a_p; |
| 600 | } */ *ap; |
| 601 | { |
| 602 | |
| 603 | if (ap->a_vap->va_flags != VNOVAL) |
| 604 | return (EOPNOTSUPP); |
| 605 | |
| 606 | /* |
| 607 | * just fake out attribute setting |
| 608 | * it's not good to generate an error |
| 609 | * return, otherwise things like creat() |
| 610 | * will fail when they try to set the |
| 611 | * file length to 0. worse, this means |
| 612 | * that echo $note > /proc/$pid/note will fail. |
| 613 | */ |
| 614 | |
| 615 | return (0); |
| 616 | } |
| 617 | |
| 618 | /* |
| 619 | * implement access checking. |
| 620 | * |
| 621 | * something very similar to this code is duplicated |
| 622 | * throughout the 4bsd kernel and should be moved |
| 623 | * into kern/vfs_subr.c sometime. |
| 624 | * |
| 625 | * actually, the check for super-user is slightly |
| 626 | * broken since it will allow read access to write-only |
| 627 | * objects. this doesn't cause any particular trouble |
| 628 | * but does mean that the i/o entry points need to check |
| 629 | * that the operation really does make sense. |
| 630 | */ |
| 631 | static int |
| 632 | procfs_access(ap) |
| 633 | struct vop_access_args /* { |
| 634 | struct vnode *a_vp; |
| 635 | int a_mode; |
| 636 | struct ucred *a_cred; |
| 637 | struct proc *a_p; |
| 638 | } */ *ap; |
| 639 | { |
| 640 | struct vattr *vap; |
| 641 | struct vattr vattr; |
| 642 | int error; |
| 643 | |
| 644 | /* |
| 645 | * If you're the super-user, |
| 646 | * you always get access. |
| 647 | */ |
| 648 | if (ap->a_cred->cr_uid == 0) |
| 649 | return (0); |
| 650 | |
| 651 | vap = &vattr; |
| 652 | error = VOP_GETATTR(ap->a_vp, vap, ap->a_cred, ap->a_p); |
| 653 | if (error) |
| 654 | return (error); |
| 655 | |
| 656 | /* |
| 657 | * Access check is based on only one of owner, group, public. |
| 658 | * If not owner, then check group. If not a member of the |
| 659 | * group, then check public access. |
| 660 | */ |
| 661 | if (ap->a_cred->cr_uid != vap->va_uid) { |
| 662 | gid_t *gp; |
| 663 | int i; |
| 664 | |
| 665 | ap->a_mode >>= 3; |
| 666 | gp = ap->a_cred->cr_groups; |
| 667 | for (i = 0; i < ap->a_cred->cr_ngroups; i++, gp++) |
| 668 | if (vap->va_gid == *gp) |
| 669 | goto found; |
| 670 | ap->a_mode >>= 3; |
| 671 | found: |
| 672 | ; |
| 673 | } |
| 674 | |
| 675 | if ((vap->va_mode & ap->a_mode) == ap->a_mode) |
| 676 | return (0); |
| 677 | |
| 678 | return (EACCES); |
| 679 | } |
| 680 | |
| 681 | /* |
| 682 | * lookup. this is incredibly complicated in the |
| 683 | * general case, however for most pseudo-filesystems |
| 684 | * very little needs to be done. |
| 685 | * |
| 686 | * unless you want to get a migraine, just make sure your |
| 687 | * filesystem doesn't do any locking of its own. otherwise |
| 688 | * read and inwardly digest ufs_lookup(). |
| 689 | */ |
| 690 | static int |
| 691 | procfs_lookup(ap) |
| 692 | struct vop_lookup_args /* { |
| 693 | struct vnode * a_dvp; |
| 694 | struct vnode ** a_vpp; |
| 695 | struct componentname * a_cnp; |
| 696 | } */ *ap; |
| 697 | { |
| 698 | struct componentname *cnp = ap->a_cnp; |
| 699 | struct vnode **vpp = ap->a_vpp; |
| 700 | struct vnode *dvp = ap->a_dvp; |
| 701 | char *pname = cnp->cn_nameptr; |
| 702 | /* struct proc *curp = cnp->cn_proc; */ |
| 703 | struct proc_target *pt; |
| 704 | pid_t pid; |
| 705 | struct pfsnode *pfs; |
| 706 | struct proc *p; |
| 707 | int i; |
| 708 | |
| 709 | *vpp = NULL; |
| 710 | |
| 711 | if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) |
| 712 | return (EROFS); |
| 713 | |
| 714 | if (cnp->cn_namelen == 1 && *pname == '.') { |
| 715 | *vpp = dvp; |
| 716 | VREF(dvp); |
| 717 | /* vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, curp); */ |
| 718 | return (0); |
| 719 | } |
| 720 | |
| 721 | pfs = VTOPFS(dvp); |
| 722 | switch (pfs->pfs_type) { |
| 723 | case Proot: |
| 724 | if (cnp->cn_flags & ISDOTDOT) |
| 725 | return (EIO); |
| 726 | |
| 727 | if (CNEQ(cnp, "curproc", 7)) |
| 728 | return (procfs_allocvp(dvp->v_mount, vpp, 0, Pcurproc)); |
| 729 | |
| 730 | pid = atopid(pname, cnp->cn_namelen); |
| 731 | if (pid == NO_PID) |
| 732 | break; |
| 733 | |
| 734 | p = PFIND(pid); |
| 735 | if (p == NULL) |
| 736 | break; |
| 737 | |
| 738 | return (procfs_allocvp(dvp->v_mount, vpp, pid, Pproc)); |
| 739 | |
| 740 | case Pproc: |
| 741 | if (cnp->cn_flags & ISDOTDOT) |
| 742 | return (procfs_root(dvp->v_mount, vpp)); |
| 743 | |
| 744 | p = PFIND(pfs->pfs_pid); |
| 745 | if (p == NULL) |
| 746 | break; |
| 747 | |
| 748 | for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) { |
| 749 | if (cnp->cn_namelen == pt->pt_namlen && |
| 750 | bcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && |
| 751 | (pt->pt_valid == NULL || (*pt->pt_valid)(p))) |
| 752 | goto found; |
| 753 | } |
| 754 | break; |
| 755 | found: |
| 756 | return (procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, |
| 757 | pt->pt_pfstype)); |
| 758 | |
| 759 | default: |
| 760 | return (ENOTDIR); |
| 761 | } |
| 762 | |
| 763 | return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS); |
| 764 | } |
| 765 | |
| 766 | /* |
| 767 | * Does this process have a text file? |
| 768 | */ |
| 769 | int |
| 770 | procfs_validfile(p) |
| 771 | struct proc *p; |
| 772 | { |
| 773 | |
| 774 | return (procfs_findtextvp(p) != NULLVP); |
| 775 | } |
| 776 | |
| 777 | /* |
| 778 | * readdir() returns directory entries from pfsnode (vp). |
| 779 | * |
| 780 | * We generate just one directory entry at a time, as it would probably |
| 781 | * not pay off to buffer several entries locally to save uiomove calls. |
| 782 | */ |
| 783 | static int |
| 784 | procfs_readdir(ap) |
| 785 | struct vop_readdir_args /* { |
| 786 | struct vnode *a_vp; |
| 787 | struct uio *a_uio; |
| 788 | struct ucred *a_cred; |
| 789 | int *a_eofflag; |
| 790 | int *a_ncookies; |
| 791 | u_long **a_cookies; |
| 792 | } */ *ap; |
| 793 | { |
| 794 | struct uio *uio = ap->a_uio; |
| 795 | struct dirent d; |
| 796 | struct dirent *dp = &d; |
| 797 | struct pfsnode *pfs; |
| 798 | int count, error, i, off; |
| 799 | static u_int delen; |
| 800 | |
| 801 | if (!delen) { |
| 802 | |
| 803 | d.d_namlen = PROCFS_NAMELEN; |
| 804 | delen = GENERIC_DIRSIZ(&d); |
| 805 | } |
| 806 | |
| 807 | pfs = VTOPFS(ap->a_vp); |
| 808 | |
| 809 | off = (int)uio->uio_offset; |
| 810 | if (off != uio->uio_offset || off < 0 || |
| 811 | off % delen != 0 || uio->uio_resid < delen) |
| 812 | return (EINVAL); |
| 813 | |
| 814 | error = 0; |
| 815 | count = 0; |
| 816 | i = off / delen; |
| 817 | |
| 818 | switch (pfs->pfs_type) { |
| 819 | /* |
| 820 | * this is for the process-specific sub-directories. |
| 821 | * all that is needed to is copy out all the entries |
| 822 | * from the procent[] table (top of this file). |
| 823 | */ |
| 824 | case Pproc: { |
| 825 | struct proc *p; |
| 826 | struct proc_target *pt; |
| 827 | |
| 828 | p = PFIND(pfs->pfs_pid); |
| 829 | if (p == NULL) |
| 830 | break; |
| 831 | if (!PRISON_CHECK(curproc, p)) |
| 832 | break; |
| 833 | |
| 834 | for (pt = &proc_targets[i]; |
| 835 | uio->uio_resid >= delen && i < nproc_targets; pt++, i++) { |
| 836 | if (pt->pt_valid && (*pt->pt_valid)(p) == 0) |
| 837 | continue; |
| 838 | |
| 839 | dp->d_reclen = delen; |
| 840 | dp->d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype); |
| 841 | dp->d_namlen = pt->pt_namlen; |
| 842 | bcopy(pt->pt_name, dp->d_name, pt->pt_namlen + 1); |
| 843 | dp->d_type = pt->pt_type; |
| 844 | |
| 845 | if ((error = uiomove((caddr_t)dp, delen, uio)) != 0) |
| 846 | break; |
| 847 | } |
| 848 | |
| 849 | break; |
| 850 | } |
| 851 | |
| 852 | /* |
| 853 | * this is for the root of the procfs filesystem |
| 854 | * what is needed is a special entry for "curproc" |
| 855 | * followed by an entry for each process on allproc |
| 856 | #ifdef PROCFS_ZOMBIE |
| 857 | * and zombproc. |
| 858 | #endif |
| 859 | */ |
| 860 | |
| 861 | case Proot: { |
| 862 | #ifdef PROCFS_ZOMBIE |
| 863 | int doingzomb = 0; |
| 864 | #endif |
| 865 | int pcnt = 0; |
| 866 | volatile struct proc *p = allproc.lh_first; |
| 867 | |
| 868 | for (; p && uio->uio_resid >= delen; i++, pcnt++) { |
| 869 | bzero((char *) dp, delen); |
| 870 | dp->d_reclen = delen; |
| 871 | |
| 872 | switch (i) { |
| 873 | case 0: /* `.' */ |
| 874 | case 1: /* `..' */ |
| 875 | dp->d_fileno = PROCFS_FILENO(0, Proot); |
| 876 | dp->d_namlen = i + 1; |
| 877 | bcopy("..", dp->d_name, dp->d_namlen); |
| 878 | dp->d_name[i + 1] = '\0'; |
| 879 | dp->d_type = DT_DIR; |
| 880 | break; |
| 881 | |
| 882 | case 2: |
| 883 | dp->d_fileno = PROCFS_FILENO(0, Pcurproc); |
| 884 | dp->d_namlen = 7; |
| 885 | bcopy("curproc", dp->d_name, 8); |
| 886 | dp->d_type = DT_LNK; |
| 887 | break; |
| 888 | |
| 889 | default: |
| 890 | while (pcnt < i) { |
| 891 | p = p->p_list.le_next; |
| 892 | if (!p) |
| 893 | goto done; |
| 894 | if (!PRISON_CHECK(curproc, p)) |
| 895 | continue; |
| 896 | pcnt++; |
| 897 | } |
| 898 | while (!PRISON_CHECK(curproc, p)) { |
| 899 | p = p->p_list.le_next; |
| 900 | if (!p) |
| 901 | goto done; |
| 902 | } |
| 903 | dp->d_fileno = PROCFS_FILENO(p->p_pid, Pproc); |
| 904 | dp->d_namlen = sprintf(dp->d_name, "%ld", |
| 905 | (long)p->p_pid); |
| 906 | dp->d_type = DT_DIR; |
| 907 | p = p->p_list.le_next; |
| 908 | break; |
| 909 | } |
| 910 | |
| 911 | if ((error = uiomove((caddr_t)dp, delen, uio)) != 0) |
| 912 | break; |
| 913 | } |
| 914 | done: |
| 915 | |
| 916 | #ifdef PROCFS_ZOMBIE |
| 917 | if (p == NULL && doingzomb == 0) { |
| 918 | doingzomb = 1; |
| 919 | p = zombproc.lh_first; |
| 920 | goto again; |
| 921 | } |
| 922 | #endif |
| 923 | |
| 924 | break; |
| 925 | |
| 926 | } |
| 927 | |
| 928 | default: |
| 929 | error = ENOTDIR; |
| 930 | break; |
| 931 | } |
| 932 | |
| 933 | uio->uio_offset = i * delen; |
| 934 | |
| 935 | return (error); |
| 936 | } |
| 937 | |
| 938 | /* |
| 939 | * readlink reads the link of `curproc' or `file' |
| 940 | */ |
| 941 | static int |
| 942 | procfs_readlink(ap) |
| 943 | struct vop_readlink_args *ap; |
| 944 | { |
| 945 | char buf[16]; /* should be enough */ |
| 946 | struct proc *procp; |
| 947 | struct vnode *vp = ap->a_vp; |
| 948 | struct pfsnode *pfs = VTOPFS(vp); |
| 949 | char *fullpath, *freepath; |
| 950 | int error, len; |
| 951 | |
| 952 | switch (pfs->pfs_type) { |
| 953 | case Pcurproc: |
| 954 | if (pfs->pfs_fileno != PROCFS_FILENO(0, Pcurproc)) |
| 955 | return (EINVAL); |
| 956 | |
| 957 | len = snprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid); |
| 958 | |
| 959 | return (uiomove(buf, len, ap->a_uio)); |
| 960 | /* |
| 961 | * There _should_ be no way for an entire process to disappear |
| 962 | * from under us... |
| 963 | */ |
| 964 | case Pfile: |
| 965 | procp = PFIND(pfs->pfs_pid); |
| 966 | if (procp == NULL || procp->p_cred == NULL || |
| 967 | procp->p_ucred == NULL) { |
| 968 | printf("procfs_readlink: pid %d disappeared\n", |
| 969 | pfs->pfs_pid); |
| 970 | return (uiomove("unknown", sizeof("unknown") - 1, |
| 971 | ap->a_uio)); |
| 972 | } |
| 973 | error = textvp_fullpath(procp, &fullpath, &freepath); |
| 974 | if (error != 0) |
| 975 | return (uiomove("unknown", sizeof("unknown") - 1, |
| 976 | ap->a_uio)); |
| 977 | error = uiomove(fullpath, strlen(fullpath), ap->a_uio); |
| 978 | free(freepath, M_TEMP); |
| 979 | return (error); |
| 980 | default: |
| 981 | return (EINVAL); |
| 982 | } |
| 983 | } |
| 984 | |
| 985 | /* |
| 986 | * convert decimal ascii to pid_t |
| 987 | */ |
| 988 | static pid_t |
| 989 | atopid(b, len) |
| 990 | const char *b; |
| 991 | u_int len; |
| 992 | { |
| 993 | pid_t p = 0; |
| 994 | |
| 995 | while (len--) { |
| 996 | char c = *b++; |
| 997 | if (c < '0' || c > '9') |
| 998 | return (NO_PID); |
| 999 | p = 10 * p + (c - '0'); |
| 1000 | if (p > PID_MAX) |
| 1001 | return (NO_PID); |
| 1002 | } |
| 1003 | |
| 1004 | return (p); |
| 1005 | } |
| 1006 | |
| 1007 | /* |
| 1008 | * procfs vnode operations. |
| 1009 | */ |
| 1010 | vop_t **procfs_vnodeop_p; |
| 1011 | static struct vnodeopv_entry_desc procfs_vnodeop_entries[] = { |
| 1012 | { &vop_default_desc, (vop_t *) vop_defaultop }, |
| 1013 | { &vop_access_desc, (vop_t *) procfs_access }, |
| 1014 | { &vop_advlock_desc, (vop_t *) procfs_badop }, |
| 1015 | { &vop_bmap_desc, (vop_t *) procfs_bmap }, |
| 1016 | { &vop_close_desc, (vop_t *) procfs_close }, |
| 1017 | { &vop_create_desc, (vop_t *) procfs_badop }, |
| 1018 | { &vop_getattr_desc, (vop_t *) procfs_getattr }, |
| 1019 | { &vop_inactive_desc, (vop_t *) procfs_inactive }, |
| 1020 | { &vop_link_desc, (vop_t *) procfs_badop }, |
| 1021 | { &vop_lookup_desc, (vop_t *) procfs_lookup }, |
| 1022 | { &vop_mkdir_desc, (vop_t *) procfs_badop }, |
| 1023 | { &vop_mknod_desc, (vop_t *) procfs_badop }, |
| 1024 | { &vop_open_desc, (vop_t *) procfs_open }, |
| 1025 | { &vop_pathconf_desc, (vop_t *) vop_stdpathconf }, |
| 1026 | { &vop_print_desc, (vop_t *) procfs_print }, |
| 1027 | { &vop_read_desc, (vop_t *) procfs_rw }, |
| 1028 | { &vop_readdir_desc, (vop_t *) procfs_readdir }, |
| 1029 | { &vop_readlink_desc, (vop_t *) procfs_readlink }, |
| 1030 | { &vop_reclaim_desc, (vop_t *) procfs_reclaim }, |
| 1031 | { &vop_remove_desc, (vop_t *) procfs_badop }, |
| 1032 | { &vop_rename_desc, (vop_t *) procfs_badop }, |
| 1033 | { &vop_rmdir_desc, (vop_t *) procfs_badop }, |
| 1034 | { &vop_setattr_desc, (vop_t *) procfs_setattr }, |
| 1035 | { &vop_symlink_desc, (vop_t *) procfs_badop }, |
| 1036 | { &vop_write_desc, (vop_t *) procfs_rw }, |
| 1037 | { &vop_ioctl_desc, (vop_t *) procfs_ioctl }, |
| 1038 | { NULL, NULL } |
| 1039 | }; |
| 1040 | static struct vnodeopv_desc procfs_vnodeop_opv_desc = |
| 1041 | { &procfs_vnodeop_p, procfs_vnodeop_entries }; |
| 1042 | |
| 1043 | VNODEOP_SET(procfs_vnodeop_opv_desc); |