From 39f915780c75e3ece488310d95e27fee857c71a5 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Mon, 13 Oct 2003 18:01:28 +0000 Subject: [PATCH] Augment falloc() to support thread-only file pointers (with no integer file descriptor or process). Add new generic 'easy to use' fp_*() kernel functions which operate on file pointers. This will greatly ease in-kernel functions which must open, perform I/O, and close files. Adopted from: other kernel sources and Kip Macy's checkpoint code. --- sys/conf/files | 3 +- sys/kern/kern_descrip.c | 30 ++-- sys/kern/kern_fp.c | 389 ++++++++++++++++++++++++++++++++++++++++ sys/sys/file.h | 10 +- 4 files changed, 417 insertions(+), 15 deletions(-) create mode 100644 sys/kern/kern_fp.c diff --git a/sys/conf/files b/sys/conf/files index 653313af98..448f4c41cc 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,5 +1,5 @@ # $FreeBSD: src/sys/conf/files,v 1.340.2.137 2003/06/04 17:10:30 sam Exp $ -# $DragonFly: src/sys/conf/files,v 1.16 2003/09/24 04:39:46 drhodus Exp $ +# $DragonFly: src/sys/conf/files,v 1.17 2003/10/13 18:01:23 dillon Exp $ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and @@ -748,6 +748,7 @@ kern/kern_conf.c standard kern/kern_debug.c standard kern/kern_device.c standard kern/kern_descrip.c standard +kern/kern_fp.c standard kern/kern_environment.c standard kern/kern_event.c standard kern/kern_exec.c standard diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index ebf90b4aee..490c516329 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -37,7 +37,7 @@ * * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 * $FreeBSD: src/sys/kern/kern_descrip.c,v 1.81.2.17 2003/06/06 20:21:32 tegge Exp $ - * $DragonFly: src/sys/kern/kern_descrip.c,v 1.13 2003/09/23 05:03:51 dillon Exp $ + * $DragonFly: src/sys/kern/kern_descrip.c,v 1.14 2003/10/13 18:01:25 dillon Exp $ */ #include "opt_compat.h" @@ -853,14 +853,15 @@ fdavail(p, n) } /* - * Create a new open file structure and allocate - * a file decriptor for the process that refers to it. + * falloc: + * Create a new open file structure and allocate a file decriptor + * for the process that refers to it. If p is NULL, no descriptor + * is allocated and the file pointer is returned unassociated with + * any process. resultfd is only used if p is not NULL and may + * separately be NULL indicating that you don't need the returned fd. */ int -falloc(p, resultfp, resultfd) - struct proc *p; - struct file **resultfp; - int *resultfd; +falloc(struct proc *p, struct file **resultfp, int *resultfd) { struct file *fp, *fq; int error, i; @@ -884,7 +885,8 @@ falloc(p, resultfp, resultfd) * allocating the slot, else a race might have shrunk it if we had * allocated it before the malloc. */ - if ((error = fdalloc(p, 0, &i))) { + i = -1; + if (p && (error = fdalloc(p, 0, &i))) { nfiles--; FREE(fp, M_FILE); return (error); @@ -893,12 +895,14 @@ falloc(p, resultfp, resultfd) fp->f_cred = crhold(p->p_ucred); fp->f_ops = &badfileops; fp->f_seqcount = 1; - if ((fq = p->p_fd->fd_ofiles[0])) { - LIST_INSERT_AFTER(fq, fp, f_list); - } else { - LIST_INSERT_HEAD(&filehead, fp, f_list); + if (p) { + if ((fq = p->p_fd->fd_ofiles[0]) != NULL) { + LIST_INSERT_AFTER(fq, fp, f_list); + } else { + LIST_INSERT_HEAD(&filehead, fp, f_list); + } + p->p_fd->fd_ofiles[i] = fp; } - p->p_fd->fd_ofiles[i] = fp; if (resultfp) *resultfp = fp; if (resultfd) diff --git a/sys/kern/kern_fp.c b/sys/kern/kern_fp.c new file mode 100644 index 0000000000..00341b4afc --- /dev/null +++ b/sys/kern/kern_fp.c @@ -0,0 +1,389 @@ +/* + * Copyright (c) 2003 Matthew Dillon + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/kern/kern_fp.c,v 1.1 2003/10/13 18:01:25 dillon Exp $ + */ + +/* + * Direct file pointer API functions for in-kernel operations on files. These + * functions provide a open/read/write/close like interface within the kernel + * for operating on files that are not necessarily associated with processes + * and which do not (typically) have descriptors. + * + * FUTURE: file handle conversion routines to support checkpointing, + * and additional file operations (ioctl, fcntl). + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +typedef struct file *file_t; + +/* + * fp_open: + * + * Open a file as specified. Use O_* flags for flags. + * + */ +int +fp_open(const char *path, int flags, int mode, file_t *fpp) +{ + struct nameidata nd; + struct thread *td; + struct file *fp; + int error; + + if ((error = falloc(NULL, fpp, NULL)) != 0) + return (error); + fp = *fpp; + td = curthread; + NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_SYSSPACE, path, td); + flags = FFLAGS(flags); + if ((error = vn_open(&nd, flags, 0)) == 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + fp->f_data = (caddr_t)nd.ni_vp; + fp->f_flag = flags; + fp->f_ops = &vnops; + fp->f_type = DTYPE_VNODE; + VOP_UNLOCK(nd.ni_vp, 0, td); + } else { + fdrop(fp, td); + } + return(error); +} + +int +fp_read(file_t fp, void *buf, size_t nbytes, off_t offset, ssize_t *res) +{ + struct uio auio; + struct iovec aiov; + size_t count; + int error; + + if (res) + *res = 0; + if (nbytes > INT_MAX) + return (EINVAL); + bzero(&auio, sizeof(auio)); + aiov.iov_base = (caddr_t)buf; + aiov.iov_len = nbytes; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = offset; + auio.uio_resid = nbytes; + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_td = curthread; + + count = nbytes; + error = fo_read(fp, &auio, fp->f_cred, FOF_OFFSET, auio.uio_td); + if (error) { + if (auio.uio_resid != nbytes && (error == ERESTART || error == EINTR || + error == EWOULDBLOCK) + ) { + error = 0; + } + } + count -= auio.uio_resid; + if (res) + *res = count; + return(error); +} + +int +fp_write(file_t fp, void *buf, size_t nbytes, off_t offset, ssize_t *res) +{ + struct uio auio; + struct iovec aiov; + size_t count; + int error; + + if (res) + *res = 0; + if (nbytes > INT_MAX) + return (EINVAL); + bzero(&auio, sizeof(auio)); + aiov.iov_base = (caddr_t)buf; + aiov.iov_len = nbytes; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = offset; + auio.uio_resid = nbytes; + auio.uio_rw = UIO_WRITE; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_td = curthread; + + count = nbytes; + error = fo_write(fp, &auio, fp->f_cred, FOF_OFFSET, auio.uio_td); + if (error) { + if (auio.uio_resid != nbytes && (error == ERESTART || error == EINTR || + error == EWOULDBLOCK) + ) { + error = 0; + } + } + count -= auio.uio_resid; + if (res) + *res = count; + return(error); +} + +int +fp_stat(file_t fp, struct stat *ub) +{ + int error; + + error = fo_stat(fp, ub, curthread); + return(error); +} + +/* + * non-anonymous, non-stack descriptor mappings only! + * + * This routine mostly snarfed from vm/vm_mmap.c + */ +int +fp_mmap(void *addr_arg, size_t size, int prot, int flags, struct file *fp, + off_t pos, void **resp) +{ + struct thread *td = curthread; + struct proc *p = td->td_proc; + vm_size_t pageoff; + vm_prot_t maxprot; + vm_offset_t addr; + void *handle; + int error; + vm_object_t obj; + struct vmspace *vms = p->p_vmspace; + struct vnode *vp; + int disablexworkaround; + + prot &= VM_PROT_ALL; + + if ((ssize_t)size < 0 || (flags & MAP_ANON)) + return(EINVAL); + + pageoff = (pos & PAGE_MASK); + pos -= pageoff; + + /* Adjust size for rounding (on both ends). */ + size += pageoff; /* low end... */ + size = (vm_size_t)round_page(size); /* hi end */ + addr = (vm_offset_t)addr_arg; + + /* + * Check for illegal addresses. Watch out for address wrap... Note + * that VM_*_ADDRESS are not constants due to casts (argh). + */ + if (flags & MAP_FIXED) { + /* + * The specified address must have the same remainder + * as the file offset taken modulo PAGE_SIZE, so it + * should be aligned after adjustment by pageoff. + */ + addr -= pageoff; + if (addr & PAGE_MASK) + return (EINVAL); + /* Address range must be all in user VM space. */ + if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) + return (EINVAL); +#ifndef i386 + if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) + return (EINVAL); +#endif + if (addr + size < addr) + return (EINVAL); + } else if (addr == 0 || + (addr >= round_page((vm_offset_t)vms->vm_taddr) && + addr < round_page((vm_offset_t)vms->vm_daddr + maxdsiz)) + ) { + /* + * XXX for non-fixed mappings where no hint is provided or + * the hint would fall in the potential heap space, + * place it after the end of the largest possible heap. + * + * There should really be a pmap call to determine a reasonable + * location. + */ + addr = round_page((vm_offset_t)vms->vm_daddr + maxdsiz); + } + + /* + * Mapping file, get fp for validation. Obtain vnode and make + * sure it is of appropriate type. + */ + if (fp->f_type != DTYPE_VNODE) + return (EINVAL); + + /* + * POSIX shared-memory objects are defined to have + * kernel persistence, and are not defined to support + * read(2)/write(2) -- or even open(2). Thus, we can + * use MAP_ASYNC to trade on-disk coherence for speed. + * The shm_open(3) library routine turns on the FPOSIXSHM + * flag to request this behavior. + */ + if (fp->f_flag & FPOSIXSHM) + flags |= MAP_NOSYNC; + vp = (struct vnode *) fp->f_data; + if (vp->v_type != VREG && vp->v_type != VCHR) + return (EINVAL); + + /* + * Get the proper underlying object + */ + if (vp->v_type == VREG) { + if (VOP_GETVOBJECT(vp, &obj) != 0) + return (EINVAL); + vp = (struct vnode*)obj->handle; + } + + /* + * XXX hack to handle use of /dev/zero to map anon memory (ala + * SunOS). + */ + if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { + handle = NULL; + maxprot = VM_PROT_ALL; + flags |= MAP_ANON; + pos = 0; + } else { + /* + * cdevs does not provide private mappings of any kind. + */ + /* + * However, for XIG X server to continue to work, + * we should allow the superuser to do it anyway. + * We only allow it at securelevel < 1. + * (Because the XIG X server writes directly to video + * memory via /dev/mem, it should never work at any + * other securelevel. + * XXX this will have to go + */ + if (securelevel >= 1) + disablexworkaround = 1; + else + disablexworkaround = suser(td); + if (vp->v_type == VCHR && disablexworkaround && + (flags & (MAP_PRIVATE|MAP_COPY))) { + error = EINVAL; + goto done; + } + /* + * Ensure that file and memory protections are + * compatible. Note that we only worry about + * writability if mapping is shared; in this case, + * current and max prot are dictated by the open file. + * XXX use the vnode instead? Problem is: what + * credentials do we use for determination? What if + * proc does a setuid? + */ + maxprot = VM_PROT_EXECUTE; /* ??? */ + if (fp->f_flag & FREAD) { + maxprot |= VM_PROT_READ; + } else if (prot & PROT_READ) { + error = EACCES; + goto done; + } + /* + * If we are sharing potential changes (either via + * MAP_SHARED or via the implicit sharing of character + * device mappings), and we are trying to get write + * permission although we opened it without asking + * for it, bail out. Check for superuser, only if + * we're at securelevel < 1, to allow the XIG X server + * to continue to work. + */ + + if ((flags & MAP_SHARED) != 0 || + (vp->v_type == VCHR && disablexworkaround) + ) { + if ((fp->f_flag & FWRITE) != 0) { + struct vattr va; + if ((error = VOP_GETATTR(vp, &va, td))) { + goto done; + } + if ((va.va_flags & (IMMUTABLE|APPEND)) == 0) { + maxprot |= VM_PROT_WRITE; + } else if (prot & PROT_WRITE) { + error = EPERM; + goto done; + } + } else if ((prot & PROT_WRITE) != 0) { + error = EACCES; + goto done; + } + } else { + maxprot |= VM_PROT_WRITE; + } + handle = (void *)vp; + } + error = vm_mmap(&vms->vm_map, &addr, size, prot, + maxprot, flags, handle, pos); + if (error == 0 && addr_arg) + *resp = (void *)addr; +done: + return (error); +} + +int +fp_close(file_t fp) +{ + return(fdrop(fp, curthread)); +} + diff --git a/sys/sys/file.h b/sys/sys/file.h index a59dfb02c7..a1749e2e30 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -32,7 +32,7 @@ * * @(#)file.h 8.3 (Berkeley) 1/9/95 * $FreeBSD: src/sys/sys/file.h,v 1.22.2.7 2002/11/21 23:39:24 sam Exp $ - * $DragonFly: src/sys/sys/file.h,v 1.5 2003/08/27 02:03:22 dillon Exp $ + * $DragonFly: src/sys/sys/file.h,v 1.6 2003/10/13 18:01:28 dillon Exp $ */ #ifndef _SYS_FILE_H_ @@ -121,6 +121,14 @@ MALLOC_DECLARE(M_FILE); extern int fdrop (struct file *fp, struct thread *td); +extern int fp_open(const char *path, int flags, int mode, struct file **fpp); +extern int fp_read(struct file *fp, void *buf, size_t nbytes, off_t offset, ssize_t *res); +extern int fp_write(struct file *fp, void *buf, size_t nbytes, off_t offset, ssize_t *res); +extern int fp_stat(struct file *fp, struct stat *ub); +extern int fp_mmap(void *addr, size_t size, int prot, int flags, struct file *fp, off_t pos, void **resp); + +extern int fp_close(struct file *fp); + extern struct filelist filehead; /* head of list of open files */ extern struct fileops vnops; extern struct fileops badfileops; -- 2.41.0