From f6bf3af1076883df4e7c8e114a0f3d6cffc9b617 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sun, 18 Jan 2004 12:29:50 +0000 Subject: [PATCH] CAPS IPC library stage 1/3: The core CAPS IPC code, providing system calls to create and connect to named rendezvous points. The CAPS interface implements a many-to-1 (client:server) capability and is totally self contained. The messaging is designed to support single and multi-threading, synchronous or asynchronous (as of this commit: polling and synchronous only). Message data is 100% opaque and so while the intention is to integrate it into a userland LWKT messaging subsystem, the actual system calls do not depend on any LWKT structures. Since these system calls are experiemental and may contain root holes, they must be enabled via the sysctl kern.caps_enabled. --- sys/conf/files | 5 +- sys/i386/i386/pmap.c | 72 ++- sys/kern/kern_exit.c | 5 +- sys/kern/lwkt_caps.c | 931 ++++++++++++++++++++++++++++++++++ sys/kern/lwkt_thread.c | 4 +- sys/platform/pc32/i386/pmap.c | 72 ++- sys/sys/caps.h | 196 +++++-- sys/sys/msgport.h | 6 +- sys/sys/msgport2.h | 3 +- sys/sys/thread.h | 4 +- sys/vm/pmap.h | 4 +- sys/vm/vm_copy.c | 133 +++++ sys/vm/vm_extern.h | 3 +- 13 files changed, 1372 insertions(+), 66 deletions(-) create mode 100644 sys/kern/lwkt_caps.c create mode 100644 sys/vm/vm_copy.c diff --git a/sys/conf/files b/sys/conf/files index 14c55611d7..e77d383edf 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,5 +1,5 @@ # $FreeBSD: src/sys/conf/files,v 1.340.2.137 2003/06/04 17:10:30 sam Exp $ -# $DragonFly: src/sys/conf/files,v 1.41 2004/01/15 18:22:22 joerg Exp $ +# $DragonFly: src/sys/conf/files,v 1.42 2004/01/18 12:29:46 dillon Exp $ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and @@ -642,7 +642,7 @@ kern/kern_switch.c standard kern/lwkt_thread.c standard kern/lwkt_msgport.c standard kern/lwkt_rwlock.c standard -#kern/lwkt_caps.c standard +kern/lwkt_caps.c standard kern/kern_synch.c standard kern/kern_syscalls.c standard kern/kern_sysctl.c standard @@ -1150,6 +1150,7 @@ vm/default_pager.c standard vm/device_pager.c standard vm/phys_pager.c standard vm/swap_pager.c standard +vm/vm_copy.c standard vm/vm_contig.c standard vm/vm_fault.c standard vm/vm_glue.c standard diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index e5736c145b..59763d8f42 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -40,7 +40,7 @@ * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ - * $DragonFly: src/sys/i386/i386/Attic/pmap.c,v 1.26 2003/12/20 05:52:26 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/pmap.c,v 1.27 2004/01/18 12:29:47 dillon Exp $ */ /* @@ -684,7 +684,44 @@ pmap_extract(pmap_t pmap, vm_offset_t va) return rtval; } return 0; +} + +/* + * Extract user accessible page only, return NULL if the page is not + * present or if it's current state is not sufficient. Caller will + * generally call vm_fault() on failure and try again. + */ +vm_page_t +pmap_extract_vmpage(pmap_t pmap, vm_offset_t va, int prot) +{ + vm_offset_t rtval; + vm_offset_t pdirindex; + + pdirindex = va >> PDRSHIFT; + if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) { + unsigned *pte; + vm_page_t m; + if ((rtval & PG_PS) != 0) { + if ((rtval & (PG_V|PG_U)) != (PG_V|PG_U)) + return (NULL); + if ((prot & VM_PROT_WRITE) && (rtval & PG_RW) == 0) + return (NULL); + rtval &= ~(NBPDR - 1); + rtval |= va & (NBPDR - 1); + m = PHYS_TO_VM_PAGE(rtval); + } else { + pte = get_ptbase(pmap) + i386_btop(va); + if ((*pte & (PG_V|PG_U)) != (PG_V|PG_U)) + return (NULL); + if ((prot & VM_PROT_WRITE) && (*pte & PG_RW) == 0) + return (NULL); + rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK)); + m = PHYS_TO_VM_PAGE(rtval); + } + return(m); + } + return (NULL); } /*************************************************** @@ -2664,6 +2701,39 @@ pmap_copy_page(vm_paddr_t src, vm_paddr_t dst) crit_exit(); } +/* + * pmap_copy_page_frag: + * + * Copy the physical page from the source PA to the target PA. + * This function may be called from an interrupt. No locking + * is required. + */ +void +pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes) +{ + struct mdglobaldata *gd = mdcpu; + + crit_enter(); + if (*(int *) gd->gd_CMAP1) + panic("pmap_copy_page: CMAP1 busy"); + if (*(int *) gd->gd_CMAP2) + panic("pmap_copy_page: CMAP2 busy"); + + *(int *) gd->gd_CMAP1 = PG_V | (src & PG_FRAME) | PG_A; + *(int *) gd->gd_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M; + + cpu_invlpg(gd->gd_CADDR1); + cpu_invlpg(gd->gd_CADDR2); + + bcopy((char *)gd->gd_CADDR1 + (src & PAGE_MASK), + (char *)gd->gd_CADDR2 + (dst & PAGE_MASK), + bytes); + + *(int *) gd->gd_CMAP1 = 0; + *(int *) gd->gd_CMAP2 = 0; + crit_exit(); +} + /* * Routine: pmap_pageable diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 7fe4fea117..8bf57f7de3 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -37,7 +37,7 @@ * * @(#)kern_exit.c 8.7 (Berkeley) 2/12/94 * $FreeBSD: src/sys/kern/kern_exit.c,v 1.92.2.11 2003/01/13 22:51:16 dillon Exp $ - * $DragonFly: src/sys/kern/kern_exit.c,v 1.29 2003/11/21 05:29:04 dillon Exp $ + * $DragonFly: src/sys/kern/kern_exit.c,v 1.30 2004/01/18 12:29:49 dillon Exp $ */ #include "opt_compat.h" @@ -64,6 +64,7 @@ #include #include #include +#include #include #include @@ -123,6 +124,8 @@ exit1(int rv) panic("Going nowhere without my init!"); } + caps_exit(p->p_thread); + aio_proc_rundown(p); /* are we a task leader? */ diff --git a/sys/kern/lwkt_caps.c b/sys/kern/lwkt_caps.c new file mode 100644 index 0000000000..a63303c03e --- /dev/null +++ b/sys/kern/lwkt_caps.c @@ -0,0 +1,931 @@ +/* + * Copyright (c) 2003 Matthew Dillon + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/kern/lwkt_caps.c,v 1.1 2004/01/18 12:29:49 dillon Exp $ + */ + +/* + * This module implements the DragonFly LWKT IPC rendezvous and message + * passing API which operates between userland processes, between userland + * threads, and between userland processes and kernel threads. This API + * is known as the CAPS interface. + * + * Generally speaking this module abstracts the LWKT message port interface + * into userland Clients and Servers rendezvous through ports named + * by or wildcarded by (name,uid,gid). The kernel provides system calls + * which may be assigned to the mp_* fields in a userland-supplied + * kernel-managed port, and a registration interface which associates an + * upcall with a userland port. The kernel tracks authentication information + * and deals with connection failures by automatically replying to unreplied + * messages. + * + * From the userland perspective a client/server connection involves two + * message ports on the client and two message ports on the server. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int caps_process_msg(caps_kinfo_t caps, caps_kmsg_t msg, struct caps_sys_get_args *uap); +static void caps_free(caps_kinfo_t caps); +static void caps_free_msg(caps_kmsg_t msg); +static caps_kinfo_t caps_free_msg_mcaps(caps_kmsg_t msg); +static caps_kinfo_t kern_caps_sys_service(const char *name, uid_t uid, + gid_t gid, struct ucred *cred, + int flags, int *error); +static caps_kinfo_t kern_caps_sys_client(const char *name, uid_t uid, + gid_t gid, struct ucred *cred, int flags, int *error); + +#define CAPS_HSIZE 64 +#define CAPS_HMASK (CAPS_HSIZE - 1) + +static caps_kinfo_t caps_hash_ary[CAPS_HSIZE]; + +MALLOC_DEFINE(M_CAPS, "caps", "caps IPC messaging"); + +static int caps_enabled; +SYSCTL_INT(_kern, OID_AUTO, caps_enabled, + CTLFLAG_RW, &caps_enabled, 0, "Enable CAPS"); + +/************************************************************************ + * INLINE SUPPORT FUNCTIONS * + ************************************************************************/ + +static __inline +struct caps_kinfo ** +caps_hash(const char *name, int len) +{ + int hv = 0x7123F4B3; + + while (--len >= 0) + hv = (hv << 5) ^ name[len] ^ (hv >> 23); + return(&caps_hash_ary[(hv ^ (hv >> 16)) & CAPS_HMASK]); +} + +static __inline +void +caps_hold(caps_kinfo_t caps) +{ + ++caps->ci_refs; +} + +static __inline +void +caps_drop(caps_kinfo_t caps) +{ + if (--caps->ci_refs == 0) + caps_free(caps); +} + +/************************************************************************ + * STATIC SUPPORT FUNCTIONS * + ************************************************************************/ + +static +caps_kinfo_t +caps_find(const char *name, int len, uid_t uid, gid_t gid) +{ + caps_kinfo_t caps; + struct caps_kinfo **chash; + + chash = caps_hash(name, len); + for (caps = *chash; caps; caps = caps->ci_hnext) { + if ((uid == (uid_t)-1 || uid == caps->ci_uid) && + (gid == (gid_t)-1 || gid == caps->ci_gid) && + len == caps->ci_namelen && + bcmp(name, caps->ci_name, len) == 0 + ) { + caps_hold(caps); + break; + } + } + return(caps); +} + +static +caps_kinfo_t +caps_find_id(int id) +{ + thread_t td = curthread; + caps_kinfo_t caps; + + for (caps = td->td_caps; caps; caps = caps->ci_tdnext) { + if (caps->ci_id == id) { + caps_hold(caps); + break; + } + } + return(caps); +} + +static +caps_kinfo_t +caps_alloc(const char *name, int len, uid_t uid, gid_t gid, + int flags, caps_type_t type) +{ + struct caps_kinfo **chash; + thread_t td = curthread; + caps_kinfo_t caps; + caps_kinfo_t ctmp; + + caps = malloc(offsetof(struct caps_kinfo, ci_name[len+1]), + M_CAPS, M_WAITOK|M_ZERO); + TAILQ_INIT(&caps->ci_msgpendq); + TAILQ_INIT(&caps->ci_msguserq); + caps->ci_uid = uid; /* -1 == not registered for uid search */ + caps->ci_gid = gid; /* -1 == not registered for gid search */ + caps->ci_type = type; + caps->ci_refs = 1; /* CAPKF_TDLIST reference */ + caps->ci_namelen = len; + caps->ci_flags = flags; + bcopy(name, caps->ci_name, len + 1); + if (type == CAPT_SERVICE) { + chash = caps_hash(caps->ci_name, len); + caps->ci_hnext = *chash; + *chash = caps; + caps->ci_flags |= CAPKF_HLIST; + } + if (td->td_caps) { + caps->ci_id = td->td_caps->ci_id + 1; + if (caps->ci_id < 0) { + /* + * It is virtually impossible for this case to occur. + */ + caps->ci_id = 1; + while ((ctmp = caps_find_id(caps->ci_id)) != NULL) { + caps_drop(ctmp); + ++caps->ci_id; + } + } + } else { + caps->ci_id = 1; + } + caps->ci_flags |= CAPKF_TDLIST; + caps->ci_tdnext = td->td_caps; + caps->ci_td = td; + td->td_caps = caps; + return(caps); +} + +static +caps_kmsg_t +caps_alloc_msg(caps_kinfo_t caps) +{ + caps_kmsg_t msg; + + msg = malloc(sizeof(struct caps_kmsg), M_CAPS, M_WAITOK|M_ZERO); + msg->km_msgid.c_id = (off_t)(uintptr_t)msg; + return(msg); +} + +static +caps_kmsg_t +caps_find_msg(caps_kinfo_t caps, off_t msgid) +{ + caps_kmsg_t msg; + + TAILQ_FOREACH(msg, &caps->ci_msguserq, km_node) { + if (msg->km_msgid.c_id == msgid) + return(msg); + } + TAILQ_FOREACH(msg, &caps->ci_msgpendq, km_node) { + if (msg->km_msgid.c_id == msgid) + return(msg); + } + return(NULL); +} + +static +caps_kinfo_t +caps_load_ccr(caps_kinfo_t caps, caps_kmsg_t msg, struct proc *p, void *udata, int ubytes) +{ + int i; + struct ucred *cr = p->p_ucred; + caps_kinfo_t rcaps; + + /* + * replace km_mcaps with new VM state, return the old km_mcaps. We + * dereference the old mcap's mrefs but do not drop its main ref count. + * The caller is expected to do that. + */ + rcaps = caps_free_msg_mcaps(msg); /* can be NULL */ + ++caps->ci_mrefs; + caps_hold(caps); + msg->km_mcaps = caps; + msg->km_umsg = udata; + msg->km_umsg_size = ubytes; + + msg->km_ccr.pid = p ? p->p_pid : -1; + msg->km_ccr.uid = cr->cr_ruid; + msg->km_ccr.euid = cr->cr_uid; + msg->km_ccr.gid = cr->cr_rgid; + msg->km_ccr.ngroups = MIN(cr->cr_ngroups, CAPS_MAXGROUPS); + for (i = 0; i < msg->km_ccr.ngroups; ++i) + msg->km_ccr.groups[i] = cr->cr_groups[i]; + return(rcaps); +} + +static void +caps_dequeue_msg(caps_kinfo_t caps, caps_kmsg_t msg) +{ + if (msg->km_flags & CAPKMF_ONUSERQ) + TAILQ_REMOVE(&caps->ci_msguserq, msg, km_node); + if (msg->km_flags & CAPKMF_ONPENDQ) + TAILQ_REMOVE(&caps->ci_msgpendq, msg, km_node); + msg->km_flags &= ~(CAPKMF_ONPENDQ|CAPKMF_ONUSERQ); +} + +static void +caps_put_msg(caps_kinfo_t caps, caps_kmsg_t msg, caps_msg_state_t state) +{ + KKASSERT((msg->km_flags & (CAPKMF_ONUSERQ|CAPKMF_ONPENDQ)) == 0); + + msg->km_flags |= CAPKMF_ONPENDQ; + msg->km_flags &= ~CAPKMF_PEEKED; + msg->km_state = state; + TAILQ_INSERT_TAIL(&caps->ci_msgpendq, msg, km_node); + + /* + * Instead of waking up the service for both new messages and disposals, + * just wakeup the service for new messages and it will process the + * previous disposal in the same loop, reducing the number of context + * switches required to run an IPC. + */ + if (state != CAPMS_DISPOSE) + wakeup(caps); + caps_drop(caps); +} + +/* + * caps_free_msg_mcaps() + * + * Free the vmspace reference relating to the data associated with the + * message (this prevents the target process from exiting too early). + * Return and clear km_mcaps. The caller is responsible for dropping the + * reference to the returned caps. + */ +static +caps_kinfo_t +caps_free_msg_mcaps(caps_kmsg_t msg) +{ + caps_kinfo_t mcaps; + + if ((mcaps = msg->km_mcaps) != NULL) { + msg->km_mcaps = NULL; + if (--mcaps->ci_mrefs == 0 && (mcaps->ci_flags & CAPKF_MWAIT)) + wakeup(mcaps); + } + return(mcaps); +} + +/* + * caps_free_msg() + * + * Free a caps placeholder message. The message must not be on any queues. + */ +static void +caps_free_msg(caps_kmsg_t msg) +{ + caps_kinfo_t rcaps; + + if ((rcaps = caps_free_msg_mcaps(msg)) != NULL) + caps_drop(rcaps); + free(msg, M_CAPS); +} + +/* + * caps_term() + * + * Terminate portions of a caps info structure. This is used to close + * an end-point or to flush particular messages on an end-point. + * + * This function should not be called with CAPKF_TDLIST unless the caller + * has an additional hold on the caps structure. + */ +static void +caps_term(caps_kinfo_t caps, int flags, caps_kinfo_t cflush) +{ + struct caps_kinfo **scan; + caps_kmsg_t msg; + + if (flags & CAPKF_TDLIST) + caps->ci_flags |= CAPKF_CLOSED; + + if (flags & CAPKF_FLUSH) { + int mflags; + struct caps_kmsg_queue tmpuserq; + struct caps_kmsg_queue tmppendq; + caps_kinfo_t rcaps; + + TAILQ_INIT(&tmpuserq); + TAILQ_INIT(&tmppendq); + + while ((msg = TAILQ_FIRST(&caps->ci_msgpendq)) != NULL || + (msg = TAILQ_FIRST(&caps->ci_msguserq)) != NULL + ) { + mflags = msg->km_flags & (CAPKMF_ONUSERQ|CAPKMF_ONPENDQ); + caps_dequeue_msg(caps, msg); + + if (cflush && msg->km_mcaps != cflush) { + if (mflags & CAPKMF_ONUSERQ) + TAILQ_INSERT_TAIL(&tmpuserq, msg, km_node); + else + TAILQ_INSERT_TAIL(&tmppendq, msg, km_node); + } else { + /* + * Dispose of the message. If the received message is a + * request we must reply it. If the received message is + * a reply we must return it for disposal. If the + * received message is a disposal request we simply free it. + */ + switch(msg->km_state) { + case CAPMS_REQUEST: + case CAPMS_REQUEST_RETRY: + rcaps = caps_load_ccr(caps, msg, curproc, NULL, 0); + if (rcaps->ci_flags & CAPKF_CLOSED) { + /* + * can't reply, if we never read the message (its on + * the pending queue), or if we are closed ourselves, + * we can just free the message. Otherwise we have + * to send ourselves a disposal request (multi-threaded + * services have to deal with disposal requests for + * messages that might be in progress). + */ + if ((caps->ci_flags & CAPKF_CLOSED) || + (mflags & CAPKMF_ONPENDQ) + ) { + caps_free_msg(msg); + caps_drop(rcaps); + } else { + caps_drop(rcaps); + caps_hold(caps); + caps_put_msg(caps, msg, CAPMS_DISPOSE); + } + } else { + /* + * auto-reply to the originator. + */ + caps_put_msg(rcaps, msg, CAPMS_REPLY); + } + break; + case CAPMS_REPLY: + case CAPMS_REPLY_RETRY: + rcaps = caps_load_ccr(caps, msg, curproc, NULL, 0); + if (caps == rcaps || (rcaps->ci_flags & CAPKF_CLOSED)) { + caps_free_msg(msg); /* degenerate disposal case */ + caps_drop(rcaps); + } else { + caps_put_msg(rcaps, msg, CAPMS_DISPOSE); + } + break; + case CAPMS_DISPOSE: + caps_free_msg(msg); + break; + } + } + } + while ((msg = TAILQ_FIRST(&tmpuserq)) != NULL) { + TAILQ_REMOVE(&tmpuserq, msg, km_node); + TAILQ_INSERT_TAIL(&caps->ci_msguserq, msg, km_node); + msg->km_flags |= CAPKMF_ONUSERQ; + } + while ((msg = TAILQ_FIRST(&tmppendq)) != NULL) { + TAILQ_REMOVE(&tmppendq, msg, km_node); + TAILQ_INSERT_TAIL(&caps->ci_msgpendq, msg, km_node); + msg->km_flags |= CAPKMF_ONPENDQ; + } + } + if ((flags & CAPKF_HLIST) && (caps->ci_flags & CAPKF_HLIST)) { + for (scan = caps_hash(caps->ci_name, caps->ci_namelen); + *scan != caps; + scan = &(*scan)->ci_hnext + ) { + KKASSERT(*scan != NULL); + } + *scan = caps->ci_hnext; + caps->ci_hnext = (void *)-1; + caps->ci_flags &= ~CAPKF_HLIST; + } + if ((flags & CAPKF_TDLIST) && (caps->ci_flags & CAPKF_TDLIST)) { + while (caps->ci_mrefs) { + caps->ci_flags |= CAPKF_MWAIT; + tsleep(caps, 0, "cexit", 0); + } + for (scan = &caps->ci_td->td_caps; + *scan != caps; + scan = &(*scan)->ci_tdnext + ) { + KKASSERT(*scan != NULL); + } + *scan = caps->ci_tdnext; + caps->ci_flags &= ~CAPKF_TDLIST; + caps->ci_tdnext = (void *)-1; + caps->ci_td = NULL; + caps_drop(caps); + } + if ((flags & CAPKF_RCAPS) && (caps->ci_flags & CAPKF_RCAPS)) { + caps_kinfo_t ctmp; + + caps->ci_flags &= ~CAPKF_RCAPS; + if ((ctmp = caps->ci_rcaps)) { + caps->ci_rcaps = NULL; + caps_term(ctmp, CAPKF_FLUSH, caps); + caps_drop(ctmp); + } + } +} + +static void +caps_free(caps_kinfo_t caps) +{ + KKASSERT(TAILQ_EMPTY(&caps->ci_msgpendq)); + KKASSERT(TAILQ_EMPTY(&caps->ci_msguserq)); + KKASSERT((caps->ci_flags & (CAPKF_HLIST|CAPKF_TDLIST)) == 0); + free(caps, M_CAPS); +} + +/************************************************************************ + * PROCESS SUPPORT FUNCTIONS * + ************************************************************************/ + +void +caps_fork(struct proc *p1, struct proc *p2) +{ + /* create dummy caps entries that fail? Or dup client entries? XXX */ +} + +void +caps_exit(struct thread *td) +{ + caps_kinfo_t caps; + + while ((caps = td->td_caps) != NULL) { + caps_hold(caps); + caps_term(caps, CAPKF_TDLIST|CAPKF_HLIST|CAPKF_FLUSH|CAPKF_RCAPS, NULL); + caps_drop(caps); + } +} + +/************************************************************************ + * SYSTEM CALLS * + ************************************************************************/ + +/* + * caps_sys_service(name, uid, gid, upcid, flags); + * + * Create an IPC service using the specified name, uid, gid, and flags. + * Either uid or gid can be -1, but not both. The port identifier is + * returned. + * + * upcid can either be an upcall or a kqueue identifier (XXX) + */ +int +caps_sys_service(struct caps_sys_service_args *uap) +{ + struct ucred *cred = curproc->p_ucred; + char name[CAPS_MAXNAMELEN]; + caps_kinfo_t caps; + int len; + int error; + + if (caps_enabled == 0) + return(EOPNOTSUPP); + if ((error = copyinstr(uap->name, name, CAPS_MAXNAMELEN, &len)) != 0) + return(error); + if (--len <= 0) + return(EINVAL); + + caps = kern_caps_sys_service(name, uap->uid, uap->gid, cred, + uap->flags & CAPF_UFLAGS, &error); + if (caps) + uap->sysmsg_result = caps->ci_id; + return(error); +} + +/* + * caps_sys_client(name, uid, gid, upcid, flags); + * + * Create an IPC client connected to the specified service. Either uid or gid + * may be -1, indicating a wildcard, but not both. The port identifier is + * returned. + * + * upcid can either be an upcall or a kqueue identifier (XXX) + */ +int +caps_sys_client(struct caps_sys_client_args *uap) +{ + struct ucred *cred = curproc->p_ucred; + char name[CAPS_MAXNAMELEN]; + caps_kinfo_t caps; + int len; + int error; + + if (caps_enabled == 0) + return(EOPNOTSUPP); + if ((error = copyinstr(uap->name, name, CAPS_MAXNAMELEN, &len)) != 0) + return(error); + if (--len <= 0) + return(EINVAL); + + caps = kern_caps_sys_client(name, uap->uid, uap->gid, cred, + uap->flags & CAPF_UFLAGS, &error); + if (caps) + uap->sysmsg_result = caps->ci_id; + return(error); +} + +int +caps_sys_close(struct caps_sys_close_args *uap) +{ + caps_kinfo_t caps; + + if ((caps = caps_find_id(uap->portid)) == NULL) + return(EINVAL); + caps_term(caps, CAPKF_TDLIST|CAPKF_HLIST|CAPKF_FLUSH|CAPKF_RCAPS, NULL); + caps_drop(caps); + return(0); +} + +/* + * caps_sys_put(portid, msg, msgsize) + * + * Send an opaque message of the specified size to the specified port. This + * function may only be used with a client port. The message id is returned. + */ +int +caps_sys_put(struct caps_sys_put_args *uap) +{ + caps_kinfo_t caps; + caps_kmsg_t msg; + struct proc *p = curproc; + + if (uap->msgsize < 0) + return(EINVAL); + if ((caps = caps_find_id(uap->portid)) == NULL) + return(EINVAL); + if (caps->ci_rcaps == NULL) { + caps_drop(caps); + return(EINVAL); + } + + /* + * If this client has queued a large number of messages return + * ENOBUFS. The client must process some replies before it can + * send new messages. The server can also throttle a client by + * holding its replies. XXX allow a server to refuse messages from + * a client. + */ + if (caps->ci_cmsgcount > CAPS_MAXINPROG) { + caps_drop(caps); + return(ENOBUFS); + } + msg = caps_alloc_msg(caps); + uap->sysmsg_offset = msg->km_msgid.c_id; + + /* + * If the remote end is closed reply the message immediately, otherwise + * send it to the remote end. Disposal XXX + * + * Note: since this is a new message, caps_load_ccr() returns a remote + * caps of NULL. + */ + if (caps->ci_rcaps->ci_flags & CAPKF_CLOSED) { + caps_load_ccr(caps, msg, p, NULL, 0); /* returns NULL */ + caps_hold(caps); + caps_put_msg(caps, msg, CAPMS_REPLY); /* drops caps */ + } else { + caps_load_ccr(caps, msg, p, uap->msg, uap->msgsize); /* returns NULL */ + caps_hold(caps->ci_rcaps); /* need ref */ + ++caps->ci_cmsgcount; + caps_put_msg(caps->ci_rcaps, msg, CAPMS_REQUEST); /* drops rcaps */ + } + caps_drop(caps); + return(0); +} + +/* + * caps_sys_reply(portid, msg, msgsize, msgid) + * + * Reply to the message referenced by the specified msgid, supplying opaque + * data back to the originator. + */ +int +caps_sys_reply(struct caps_sys_reply_args *uap) +{ + caps_kinfo_t caps; + caps_kinfo_t rcaps; + caps_kmsg_t msg; + struct proc *p; + + if (uap->msgsize < 0) + return(EINVAL); + if ((caps = caps_find_id(uap->portid)) == NULL) + return(EINVAL); + + /* + * Can't find message to reply to + */ + if ((msg = caps_find_msg(caps, uap->msgcid)) == NULL) { + caps_drop(caps); + return(EINVAL); + } + + /* + * Trying to reply to a non-replyable message + */ + if ((msg->km_flags & CAPKMF_ONUSERQ) == 0) { + caps_drop(caps); + return(EINVAL); + } + + /* + * If the remote end is closed requeue to ourselves for disposal. + * Otherwise send the reply to the other end (the other end will + * return a passive DISPOSE to us when it has eaten the data) + */ + caps_dequeue_msg(caps, msg); + p = curproc; + if (msg->km_mcaps->ci_flags & CAPKF_CLOSED) { + caps_drop(caps_load_ccr(caps, msg, p, NULL, 0)); + caps_hold(caps); + caps_put_msg(caps, msg, CAPMS_DISPOSE); /* drops caps */ + } else { + rcaps = caps_load_ccr(caps, msg, p, uap->msg, uap->msgsize); + caps_put_msg(rcaps, msg, CAPMS_REPLY); + } + caps_drop(caps); + return(0); +} + +/* + * caps_sys_get(portid, msg, maxsize, msgid, ccr) + * + * Retrieve the next ready message on the port, store its message id in + * uap->msgid and return the length of the message. If the message is too + * large to fit the message id, length, and creds are still returned, but + * the message is not dequeued (the caller is expected to call again with + * a larger buffer or to reply the messageid if it does not want to handle + * the message). + * + * EWOULDBLOCK is returned if no messages are pending. Note that 0-length + * messages are perfectly acceptable so 0 can be legitimately returned. + */ +int +caps_sys_get(struct caps_sys_get_args *uap) +{ + caps_kinfo_t caps; + caps_kmsg_t msg; + + if (uap->maxsize < 0) + return(EINVAL); + if ((caps = caps_find_id(uap->portid)) == NULL) + return(EINVAL); + if ((msg = TAILQ_FIRST(&caps->ci_msgpendq)) == NULL) { + caps_drop(caps); + return(EWOULDBLOCK); + } + return(caps_process_msg(caps, msg, uap)); +} + +/* + * caps_sys_wait(portid, msg, maxsize, msgid, ccr) + * + * Retrieve the next ready message on the port, store its message id in + * uap->msgid and return the length of the message. If the message is too + * large to fit the message id, length, and creds are still returned, but + * the message is not dequeued (the caller is expected to call again with + * a larger buffer or to reply the messageid if it does not want to handle + * the message). + * + * This function blocks until interrupted or a message is received. + * Note that 0-length messages are perfectly acceptable so 0 can be + * legitimately returned. + */ +int +caps_sys_wait(struct caps_sys_wait_args *uap) +{ + caps_kinfo_t caps; + caps_kmsg_t msg; + int error; + + if (uap->maxsize < 0) + return(EINVAL); + if ((caps = caps_find_id(uap->portid)) == NULL) + return(EINVAL); + while ((msg = TAILQ_FIRST(&caps->ci_msgpendq)) == NULL) { + if ((error = tsleep(caps, PCATCH, "caps", 0)) != 0) { + caps_drop(caps); + return(error); + } + } + return(caps_process_msg(caps, msg, (struct caps_sys_get_args *)uap)); +} + +static int +caps_process_msg(caps_kinfo_t caps, caps_kmsg_t msg, struct caps_sys_get_args *uap) +{ + int error = 0; + int msgsize; + caps_kinfo_t rcaps; + + msg->km_flags |= CAPKMF_PEEKED; + msgsize = msg->km_umsg_size; + if (msgsize <= uap->maxsize) + caps_dequeue_msg(caps, msg); + + if (msg->km_umsg_size != 0) { + struct proc *rp = msg->km_mcaps->ci_td->td_proc; + KKASSERT(rp != NULL); + error = vmspace_copy(rp->p_vmspace, (vm_offset_t)msg->km_umsg, + curproc->p_vmspace, (vm_offset_t)uap->msg, + min(msgsize, uap->maxsize), uap->maxsize); + if (error) { + printf("vmspace_copy: error %d from proc %d\n", error, rp->p_pid); + if (msgsize > uap->maxsize) + caps_dequeue_msg(caps, msg); + msgsize = 0; + error = 0; + } + } + + if (uap->msgid) + error = copyout(&msg->km_msgid, uap->msgid, sizeof(msg->km_msgid)); + if (uap->ccr) + error = copyout(&msg->km_ccr, uap->ccr, sizeof(msg->km_ccr)); + if (error == 0) + uap->sysmsg_result = msgsize; + + /* + * If the message was dequeued we must deal with it. + */ + if (msgsize <= uap->maxsize) { + switch(msg->km_state) { + case CAPMS_REQUEST: + case CAPMS_REQUEST_RETRY: + TAILQ_INSERT_TAIL(&caps->ci_msguserq, msg, km_node); + msg->km_flags |= CAPKMF_ONUSERQ; + break; + case CAPMS_REPLY: + case CAPMS_REPLY_RETRY: + --caps->ci_cmsgcount; + rcaps = caps_load_ccr(caps, msg, curproc, NULL, 0); + if (caps == rcaps || (rcaps->ci_flags & CAPKF_CLOSED)) { + /* degenerate disposal case */ + caps_free_msg(msg); + caps_drop(rcaps); + } else { + caps_put_msg(rcaps, msg, CAPMS_DISPOSE); + } + break; + case CAPMS_DISPOSE: + caps_free_msg(msg); + break; + } + } + caps_drop(caps); + return(error); +} + +/* + * caps_sys_abort(portid, msgcid, flags) + * + * Abort a previously sent message. You must still wait for the message + * to be returned after sending the abort request. This function will + * return the appropriate CAPS_ABORT_* code depending on what it had + * to do. + */ +int +caps_sys_abort(struct caps_sys_abort_args *uap) +{ + uap->sysmsg_result = CAPS_ABORT_NOTIMPL; + return(0); +} + +/* + * KERNEL SYSCALL SEPARATION SUPPORT FUNCTIONS + */ + +static +caps_kinfo_t +kern_caps_sys_service(const char *name, uid_t uid, gid_t gid, + struct ucred *cred, int flags, int *error) +{ + caps_kinfo_t caps; + int len; + + len = strlen(name); + + /* + * Make sure we can use the uid and gid + */ + if (cred) { + if (cred->cr_uid != 0 && uid != (uid_t)-1 && cred->cr_uid != uid) { + *error = EPERM; + return(NULL); + } + if (cred->cr_uid != 0 && gid != (gid_t)-1 && !groupmember(gid, cred)) { + *error = EPERM; + return(NULL); + } + } + + /* + * Handle CAPF_EXCL + */ + if (flags & CAPF_EXCL) { + if ((caps = caps_find(name, strlen(name), uid, gid)) != NULL) { + caps_drop(caps); + *error = EEXIST; + return(NULL); + } + } + + /* + * Create the service + */ + caps = caps_alloc(name, len, uid, gid, flags & CAPF_UFLAGS, CAPT_SERVICE); + return(caps); +} + +static +caps_kinfo_t +kern_caps_sys_client(const char *name, uid_t uid, gid_t gid, + struct ucred *cred, int flags, int *error) +{ + caps_kinfo_t caps, rcaps; + int len; + + len = strlen(name); + + /* + * Locate the CAPS service (rcaps ref is for caps->ci_rcaps) + */ + if ((rcaps = caps_find(name, len, uid, gid)) == NULL) { + *error = ENOENT; + return(NULL); + } + + /* + * Check permissions + */ + if (cred) { + *error = EACCES; + if ((flags & CAPF_USER) && (rcaps->ci_flags & CAPF_USER)) { + if (rcaps->ci_uid != (uid_t)-1 && rcaps->ci_uid == cred->cr_uid) + *error = 0; + } + if ((flags & CAPF_GROUP) && (rcaps->ci_flags & CAPF_GROUP)) { + if (rcaps->ci_gid != (gid_t)-1 && groupmember(rcaps->ci_gid, cred)) + *error = 0; + } + if ((flags & CAPF_WORLD) && (rcaps->ci_flags & CAPF_WORLD)) { + *error = 0; + } + if (*error) { + caps_drop(rcaps); + return(NULL); + } + } else { + *error = 0; + } + + /* + * Allocate the client side and connect to the server + */ + caps = caps_alloc(name, len, uid, gid, flags & CAPF_UFLAGS, CAPT_CLIENT); + caps->ci_rcaps = rcaps; + caps->ci_flags |= CAPKF_RCAPS; + return(caps); +} + diff --git a/sys/kern/lwkt_thread.c b/sys/kern/lwkt_thread.c index 0dd10baaef..6d50012fbd 100644 --- a/sys/kern/lwkt_thread.c +++ b/sys/kern/lwkt_thread.c @@ -23,7 +23,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/kern/lwkt_thread.c,v 1.47 2003/12/30 03:19:02 dillon Exp $ + * $DragonFly: src/sys/kern/lwkt_thread.c,v 1.48 2004/01/18 12:29:49 dillon Exp $ */ /* @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -1308,6 +1309,7 @@ lwkt_exit(void) if (td->td_flags & TDF_VERBOSE) printf("kthread %p %s has exited\n", td, td->td_comm); + caps_exit(td); crit_enter(); lwkt_deschedule_self(); ++mycpu->gd_tdfreecount; diff --git a/sys/platform/pc32/i386/pmap.c b/sys/platform/pc32/i386/pmap.c index 5a9b3c3ae0..710474a5e1 100644 --- a/sys/platform/pc32/i386/pmap.c +++ b/sys/platform/pc32/i386/pmap.c @@ -40,7 +40,7 @@ * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ - * $DragonFly: src/sys/platform/pc32/i386/pmap.c,v 1.26 2003/12/20 05:52:26 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/pmap.c,v 1.27 2004/01/18 12:29:47 dillon Exp $ */ /* @@ -684,7 +684,44 @@ pmap_extract(pmap_t pmap, vm_offset_t va) return rtval; } return 0; +} + +/* + * Extract user accessible page only, return NULL if the page is not + * present or if it's current state is not sufficient. Caller will + * generally call vm_fault() on failure and try again. + */ +vm_page_t +pmap_extract_vmpage(pmap_t pmap, vm_offset_t va, int prot) +{ + vm_offset_t rtval; + vm_offset_t pdirindex; + + pdirindex = va >> PDRSHIFT; + if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) { + unsigned *pte; + vm_page_t m; + if ((rtval & PG_PS) != 0) { + if ((rtval & (PG_V|PG_U)) != (PG_V|PG_U)) + return (NULL); + if ((prot & VM_PROT_WRITE) && (rtval & PG_RW) == 0) + return (NULL); + rtval &= ~(NBPDR - 1); + rtval |= va & (NBPDR - 1); + m = PHYS_TO_VM_PAGE(rtval); + } else { + pte = get_ptbase(pmap) + i386_btop(va); + if ((*pte & (PG_V|PG_U)) != (PG_V|PG_U)) + return (NULL); + if ((prot & VM_PROT_WRITE) && (*pte & PG_RW) == 0) + return (NULL); + rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK)); + m = PHYS_TO_VM_PAGE(rtval); + } + return(m); + } + return (NULL); } /*************************************************** @@ -2664,6 +2701,39 @@ pmap_copy_page(vm_paddr_t src, vm_paddr_t dst) crit_exit(); } +/* + * pmap_copy_page_frag: + * + * Copy the physical page from the source PA to the target PA. + * This function may be called from an interrupt. No locking + * is required. + */ +void +pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes) +{ + struct mdglobaldata *gd = mdcpu; + + crit_enter(); + if (*(int *) gd->gd_CMAP1) + panic("pmap_copy_page: CMAP1 busy"); + if (*(int *) gd->gd_CMAP2) + panic("pmap_copy_page: CMAP2 busy"); + + *(int *) gd->gd_CMAP1 = PG_V | (src & PG_FRAME) | PG_A; + *(int *) gd->gd_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M; + + cpu_invlpg(gd->gd_CADDR1); + cpu_invlpg(gd->gd_CADDR2); + + bcopy((char *)gd->gd_CADDR1 + (src & PAGE_MASK), + (char *)gd->gd_CADDR2 + (dst & PAGE_MASK), + bytes); + + *(int *) gd->gd_CMAP1 = 0; + *(int *) gd->gd_CMAP2 = 0; + crit_exit(); +} + /* * Routine: pmap_pageable diff --git a/sys/sys/caps.h b/sys/sys/caps.h index 8327924db3..813060c998 100644 --- a/sys/sys/caps.h +++ b/sys/sys/caps.h @@ -3,7 +3,7 @@ * * Implements an architecture independant Capability Service API * - * $DragonFly: src/sys/sys/caps.h,v 1.1 2003/11/24 21:15:54 dillon Exp $ + * $DragonFly: src/sys/sys/caps.h,v 1.2 2004/01/18 12:29:50 dillon Exp $ */ #ifndef _SYS_CAPS_H_ @@ -16,78 +16,170 @@ #include #endif -#define CAPS_USER 0x00000001 -#define CAPS_GROUP 0x00000002 -#define CAPS_WORLD 0x00000004 -#define CAPS_EXCL 0x00000008 -#define CAPS_ANYCLIENT (CAPS_USER|CAPS_GROUP|CAPS_WORLD) -#define CAPS_WCRED 0x00000010 /* waiting for cred */ +typedef enum caps_msg_state { + CAPMS_REQUEST, + CAPMS_REQUEST_RETRY, /* internal / FUTURE */ + CAPMS_REPLY, + CAPMS_REPLY_RETRY, /* internal / FUGURE */ + CAPMS_DISPOSE +} caps_msg_state_t; + +typedef struct caps_msgid { + off_t c_id; + caps_msg_state_t c_state; + int c_reserved01; +} *caps_msgid_t; /* - * caps_type associated with caps_port: - * - * CAPT_CLIENT port returned to client representing connection to - * service. - * CAPT_SERVICE port returned to service representing namespace - * CAPT_REMOTE temporary port used by service to represent - * client connections to service (set as replyport for - * messages) - * + * Note: upper 16 bits reserved for kernel use + */ +#define CAPF_UFLAGS 0xFFFF +#define CAPF_USER 0x0001 +#define CAPF_GROUP 0x0002 +#define CAPF_WORLD 0x0004 +#define CAPF_EXCL 0x0008 +#define CAPF_ANYCLIENT (CAPF_USER|CAPF_GROUP|CAPF_WORLD) +#define CAPF_WCRED 0x0010 /* waiting for cred */ +/* FUTURE: CAPF_ASYNC - support async services */ +/* FUTURE: CAPF_NOGROUPS - don't bother filling in the groups[] array */ +/* FUTURE: CAPF_TERM - send termination request to existing service */ +/* FUTURE: CAPF_TAKE - take over existing service's connections */ +/* FUTURE: CAPF_DISPOSE_IMM - need immediate dispose wakeups */ + +/* + * Abort codes */ -enum caps_type { CAPT_UNKNOWN, CAPT_CLIENT, CAPT_SERVICE, CAPT_REMOTE }; +#define CAPS_ABORT_NOTIMPL 0 /* abort not implemented, no action */ +#define CAPS_ABORT_RETURNED 1 /* already returned, no action */ +#define CAPS_ABORT_BEFORESERVER 2 /* caught before the server got it */ +#define CAPS_ABORT_ATSERVER 3 /* server had retrieved message */ + +#define CAPF_ABORT_HARD 0x0001 /* rip out from under server (3) */ #define CAPS_MAXGROUPS 16 +#define CAPS_MAXNAMELEN 64 +#define CAPS_MAXINPROG 128 struct thread; -struct caps_port; -typedef struct caps_port *caps_port_t; +typedef struct caps_port { + struct lwkt_port cp_lport; + int cp_portid; /* caps port id */ + int cp_upcallid; /* upcall id */ +} *caps_port_t; -struct caps_cred { +typedef struct caps_cred { pid_t pid; uid_t uid; uid_t euid; gid_t gid; int ngroups; + int cacheid; gid_t groups[CAPS_MAXGROUPS]; -}; - -struct caps_port { - struct lwkt_port lport; - caps_port_t server; /* if CAPT_REMOTE, pointer to server */ - enum caps_type type; - int kqfd; /* kqueue to collect active connects */ - int lfd; /* server: listening on (server) */ - int cfd; /* client/remote connection fd */ - int flags; - TAILQ_HEAD(, caps_port) clist; /* server: client client connections */ - TAILQ_ENTRY(caps_port) centry; - TAILQ_HEAD(, lwkt_msg) wlist; /* queue of outgoing messages */ - TAILQ_HEAD(, lwkt_msg) mlist; /* written message waiting for reply */ - struct lwkt_msg rmsg_static; - lwkt_msg_t rmsg; /* read message in progress */ - struct caps_cred cred; /* cred of owner of port */ - int rbytes; /* read in progress byte count */ - int wbytes; /* write in progress byte count */ -}; - -#define CAPPF_WAITCRED 0x0001 -#define CAPPF_ONLIST 0x0002 -#define CAPPF_WREQUESTED 0x0004 /* write event requested */ -#define CAPPF_SHUTDOWN 0x0008 /* terminated/failed */ - -#define CAPMSG_MAXSIZE (1024+64*1024) +} *caps_cred_t; + +#if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) + +typedef enum caps_type { + CAPT_UNKNOWN, CAPT_CLIENT, CAPT_SERVICE, CAPT_REMOTE +} caps_type_t; + +struct caps_kmsg; + +TAILQ_HEAD(caps_kmsg_queue, caps_kmsg); + +/* + * caps_kinfo - Holds a client or service registration + * + * ci_msgpendq: holds the kernel copy of the message after it has been + * sent to the local port. The message is matched up against + * replies and automatically replied if the owner closes its + * connection. + */ +typedef struct caps_kinfo { + struct lwkt_port ci_lport; /* embedded local port */ + struct caps_kinfo *ci_tdnext; /* per-process list */ + struct caps_kinfo *ci_hnext; /* registration hash table */ + struct thread *ci_td; /* owner */ + struct caps_kmsg_queue ci_msgpendq; /* pending reply (just rcvd) */ + struct caps_kmsg_queue ci_msguserq; /* pending reply (user holds) */ + struct caps_kinfo *ci_rcaps; /* connected to remote */ + int ci_cmsgcount; /* client in-progress msgs */ + int ci_id; + int ci_flags; + int ci_refs; + int ci_mrefs; /* message (vmspace) refs */ + caps_type_t ci_type; + uid_t ci_uid; + gid_t ci_gid; + int ci_namelen; + char ci_name[4]; /* variable length */ + /* ci_name must be last element */ +} *caps_kinfo_t; + +/* note: user flags are held in the low 16 bits */ +#define CAPKF_TDLIST 0x00010000 +#define CAPKF_HLIST 0x00020000 +#define CAPKF_FLUSH 0x00040000 +#define CAPKF_RCAPS 0x00080000 +#define CAPKF_CLOSED 0x00100000 +#define CAPKF_MWAIT 0x00200000 /* - * API + * Kernel caps message. The kernel keepps track of messagse received, + * undergoing processing by the service, and returned. User-supplied data + * is copied on reception rather then transmission. */ -caps_port_t caps_service(const char *name, gid_t gid, mode_t modes, int flags); -caps_port_t caps_client(const char *name, uid_t uid, int flags); +typedef struct caps_kmsg { + TAILQ_ENTRY(caps_kmsg) km_node; + caps_kinfo_t km_mcaps; /* message sender */ + void *km_umsg; /* mcaps vmspace */ + int km_umsg_size; /* mcaps vmspace */ + struct caps_cred km_ccr; /* caps cred for msg */ + struct caps_msgid km_msgid; + int km_flags; +} *caps_kmsg_t; + +#define km_state km_msgid.c_state + +#define CAPKMF_ONUSERQ 0x0001 +#define CAPKMF_ONPENDQ 0x0002 +#define CAPKMF_REPLY 0x0004 +#define CAPKMF_CDONE 0x0008 +#define CAPKMF_PEEKED 0x0010 +#define CAPKMF_ABORTED 0x0020 + +#endif + +#ifdef _KERNEL /* - * Temporary hack until LWKT threading is integrated. + * kernel support */ -void *caps_client_waitreply(caps_port_t port, lwkt_msg_t msg); +void caps_exit(struct thread *td); +void caps_fork(struct proc *p1, struct proc *p2); + +#else + +/* + * Userland API (libcaps) + */ +caps_port_t caps_service(const char *name, uid_t uid, gid_t gid, + mode_t modes, int flags); +caps_port_t caps_client(const char *name, uid_t uid, gid_t gid, int flags); + +/* + * Syscall API + */ +int caps_sys_service(const char *name, uid_t uid, gid_t gid, int upcid, int flags); +int caps_sys_client(const char *name, uid_t uid, gid_t gid, int upcid, int flags); +off_t caps_sys_put(int portid, void *msg, int msgsize); +int caps_sys_reply(int portid, void *msg, int msgsize, off_t msgcid); +int caps_sys_get(int portid, void *msg, int maxsize, caps_msgid_t msgid, caps_cred_t ccr); +int caps_sys_wait(int portid, void *msg, int maxsize, caps_msgid_t msgid, caps_cred_t ccr); +int caps_sys_abort(int portid, off_t msgcid, int flags); + +#endif #endif diff --git a/sys/sys/msgport.h b/sys/sys/msgport.h index ec9f7fc3ba..8fa61aef72 100644 --- a/sys/sys/msgport.h +++ b/sys/sys/msgport.h @@ -3,7 +3,7 @@ * * Implements LWKT messages and ports. * - * $DragonFly: src/sys/sys/msgport.h,v 1.11 2003/11/24 20:46:05 dillon Exp $ + * $DragonFly: src/sys/sys/msgport.h,v 1.12 2004/01/18 12:29:50 dillon Exp $ */ #ifndef _SYS_MSGPORT_H_ @@ -41,11 +41,11 @@ typedef struct lwkt_msg { } opaque; lwkt_port_t ms_target_port; /* current target or relay port */ lwkt_port_t ms_reply_port; /* asynch replies returned here */ - int ms_maxsize; /* maximum returned message size */ + int ms_unused1; int ms_cmd; /* message command */ int ms_flags; /* message flags */ #define ms_copyout_start ms_msgsize - int ms_msgsize; /* sent/returned size of message */ + int ms_msgsize; /* size of message */ int ms_error; /* positive error code or 0 */ union { void *ms_resultp; /* misc pointer data or result */ diff --git a/sys/sys/msgport2.h b/sys/sys/msgport2.h index d479f0dab0..3ae47ed828 100644 --- a/sys/sys/msgport2.h +++ b/sys/sys/msgport2.h @@ -3,7 +3,7 @@ * * Implements Inlines for LWKT messages and ports. * - * $DragonFly: src/sys/sys/msgport2.h,v 1.5 2003/11/24 20:46:05 dillon Exp $ + * $DragonFly: src/sys/sys/msgport2.h,v 1.6 2004/01/18 12:29:50 dillon Exp $ */ #ifndef _SYS_MSGPORT2_H_ @@ -17,7 +17,6 @@ lwkt_initmsg(lwkt_msg_t msg, lwkt_port_t rport, int cmd) msg->ms_flags = MSGF_DONE; msg->ms_reply_port = rport; msg->ms_msgsize = 0; - msg->ms_maxsize = 0; } static __inline diff --git a/sys/sys/thread.h b/sys/sys/thread.h index 9cbe1774c5..bf12f9645b 100644 --- a/sys/sys/thread.h +++ b/sys/sys/thread.h @@ -7,7 +7,7 @@ * Types which must already be defined when this header is included by * userland: struct md_thread * - * $DragonFly: src/sys/sys/thread.h,v 1.38 2003/12/07 04:20:38 dillon Exp $ + * $DragonFly: src/sys/sys/thread.h,v 1.39 2004/01/18 12:29:50 dillon Exp $ */ #ifndef _SYS_THREAD_H_ @@ -136,6 +136,7 @@ typedef struct lwkt_rwlock { * but this does not effect how the thread is scheduled by LWKT. */ struct md_intr_info; +struct caps_kinfo; struct thread { TAILQ_ENTRY(thread) td_threadq; @@ -172,6 +173,7 @@ struct thread { #endif char td_comm[MAXCOMLEN+1]; /* typ 16+1 bytes */ struct thread *td_preempted; /* we preempted this thread */ + struct caps_kinfo *td_caps; /* list of client and server registrations */ struct md_thread td_mach; }; diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h index 2f82deec85..a386e0ae39 100644 --- a/sys/vm/pmap.h +++ b/sys/vm/pmap.h @@ -62,7 +62,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/vm/pmap.h,v 1.33.2.4 2002/03/06 22:44:24 silby Exp $ - * $DragonFly: src/sys/vm/pmap.h,v 1.8 2003/11/03 17:11:23 dillon Exp $ + * $DragonFly: src/sys/vm/pmap.h,v 1.9 2004/01/18 12:29:50 dillon Exp $ */ /* @@ -102,10 +102,12 @@ void pmap_collect (void); void pmap_copy (pmap_t, pmap_t, vm_offset_t, vm_size_t, vm_offset_t); void pmap_copy_page (vm_paddr_t, vm_paddr_t); +void pmap_copy_page_frag (vm_paddr_t, vm_paddr_t, size_t bytes); void pmap_destroy (pmap_t); void pmap_enter (pmap_t, vm_offset_t, vm_page_t, vm_prot_t, boolean_t); vm_paddr_t pmap_extract (pmap_t pmap, vm_offset_t va); +vm_page_t pmap_extract_vmpage (pmap_t pmap, vm_offset_t va, int prot); void pmap_growkernel (vm_offset_t); void pmap_init (vm_paddr_t, vm_paddr_t); boolean_t pmap_is_modified (vm_page_t m); diff --git a/sys/vm/vm_copy.c b/sys/vm/vm_copy.c new file mode 100644 index 0000000000..b5008becb3 --- /dev/null +++ b/sys/vm/vm_copy.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2003 Matthew Dillon + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/vm/Attic/vm_copy.c,v 1.1 2004/01/18 12:29:50 dillon Exp $ + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Perform a generic copy between two vm_map's. + * + * This code is intended to eventually replace vm_uiomove() and is already + * used by lwkt_caps.c + * + * XXX do COW page optimizations if possible when allowed by page alignment + * and maxbytes. maxbytes - bytes represents slop space in the target + * buffer that can be junked (or invalidated) by the copy. + */ +int +vmspace_copy(struct vmspace *svm, vm_offset_t saddr, + struct vmspace *dvm, vm_offset_t daddr, + ssize_t bytes, ssize_t maxbytes) +{ +#ifdef NEW_VMSPACE + vm_paddr_t pa1, pa2; +#else + vm_page_t m1, m2; +#endif + int rv; + + if (bytes == 0) + return(0); + if (maxbytes < bytes) + maxbytes = bytes; + KKASSERT(bytes > 0); + + while (bytes) { + int n; + + n = bytes; + if (n > PAGE_SIZE - (saddr & PAGE_MASK)) + n = PAGE_SIZE - (saddr & PAGE_MASK); + if (n > PAGE_SIZE - (daddr & PAGE_MASK)) + n = PAGE_SIZE - (daddr & PAGE_MASK); + + /* + * Wire and copy on a page-by-page basis. There are more efficient + * ways of doing this, but this is 'safe'. + */ +#ifdef NEW_VMSPACE + rv = vm_fault_wire(&svm->vm_map, saddr, saddr + n); + if (rv != KERN_SUCCESS) + return(EFAULT); + rv = vm_fault_wire(&dvm->vm_map, daddr, daddr + n); + if (rv != KERN_SUCCESS) { + vm_fault_unwire(&svm->vm_map, saddr, saddr + n); + return(EFAULT); + } + pa1 = pmap_extract(&svm->vm_pmap, saddr); + pa2 = pmap_extract(&dvm->vm_pmap, daddr); + pmap_copy_page_frag(pa1, pa2, n); + vm_fault_unwire(&svm->vm_map, saddr, saddr + n); + vm_fault_unwire(&dvm->vm_map, daddr, daddr + n); +#else + for (;;) { + m1 = pmap_extract_vmpage(&svm->vm_pmap, saddr, VM_PROT_READ); + if (m1 == NULL) { + rv = vm_fault(&svm->vm_map, saddr, VM_PROT_READ, VM_FAULT_NORMAL); + if (rv != KERN_SUCCESS) + return(EFAULT); + continue; + } + m2 = pmap_extract_vmpage(&dvm->vm_pmap, daddr, VM_PROT_WRITE); + if (m2 == NULL) { + rv = vm_fault(&dvm->vm_map, daddr, VM_PROT_WRITE, VM_FAULT_NORMAL); + if (rv != KERN_SUCCESS) + return(EFAULT); + continue; + } + break; + } + + pmap_copy_page_frag(m1->phys_addr | (saddr & PAGE_MASK), + m2->phys_addr | (daddr & PAGE_MASK), n); +#endif + bytes -= n; + saddr += n; + daddr += n; + } + return(0); +} + diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h index b466283d8a..2231c2f5d5 100644 --- a/sys/vm/vm_extern.h +++ b/sys/vm/vm_extern.h @@ -32,7 +32,7 @@ * * @(#)vm_extern.h 8.2 (Berkeley) 1/12/94 * $FreeBSD: src/sys/vm/vm_extern.h,v 1.46.2.3 2003/01/13 22:51:17 dillon Exp $ - * $DragonFly: src/sys/vm/vm_extern.h,v 1.7 2004/01/14 23:26:14 dillon Exp $ + * $DragonFly: src/sys/vm/vm_extern.h,v 1.8 2004/01/18 12:29:50 dillon Exp $ */ #ifndef _VM_EXTERN_H_ @@ -78,6 +78,7 @@ int swaponvp (struct thread *, struct vnode *, dev_t , u_long); void swapout_procs (int); int useracc (caddr_t, int, int); int vm_fault (vm_map_t, vm_offset_t, vm_prot_t, int); +int vmspace_copy (struct vmspace *, vm_offset_t, struct vmspace *, vm_offset_t, ssize_t, ssize_t); void vm_fault_copy_entry (vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t); void vm_fault_unwire (vm_map_t, vm_offset_t, vm_offset_t); int vm_fault_wire (vm_map_t, vm_offset_t, vm_offset_t); -- 2.41.0