Enhance the pmap_kenter*() API and friends, separating out entries which
authorMatthew Dillon <dillon@dragonflybsd.org>
Thu, 1 Apr 2004 17:58:08 +0000 (17:58 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Thu, 1 Apr 2004 17:58:08 +0000 (17:58 +0000)
only need invalidation on the local cpu against entries which need invalidation
across the entire system, and provide a synchronization abstraction.

Enhance sf_buf_alloc() and friends to allow the caller to specify whether the
sf_buf's kernel mapping is going to be used on just the current cpu or
whether it needs to be valid across all cpus.  This is done by maintaining
a cpumask of known-synchronized cpus in the struct sf_buf

Optimize sf_buf_alloc() and friends by removing both TAILQ operations in the
critical path.  TAILQ operations to remove the sf_buf from the free queue
are now done in a lazy fashion.  Most sf_buf operations allocate a buf,
work on it, and free it, so why waste time moving the sf_buf off the freelist
if we are only going to move back onto the free list a microsecond later?

Fix a bug in sf_buf_alloc() code as it was being used by the PIPE code.
sf_buf_alloc() was unconditionally using PCATCH in its tsleep() call, which
is only correct when called from the sendfile() interface.

Optimize the PIPE code to require only local cpu_invlpg()'s when mapping
sf_buf's, greatly reducing the number of IPIs required.  On a DELL-2550,
a pipe test which explicitly blows out the sf_buf caching by using huge
buffers improves from 350 to 550 MBytes/sec.  However, note that buildworld
times were not found to have changed.

Replace the PIPE code's custom 'struct pipemapping' structure with a
struct xio and use the XIO API functions rather then its own.

sys/i386/i386/pmap.c
sys/i386/i386/uio_machdep.c
sys/kern/kern_sfbuf.c
sys/kern/kern_xio.c
sys/kern/sys_pipe.c
sys/kern/uipc_syscalls.c
sys/platform/pc32/i386/pmap.c
sys/platform/pc32/i386/uio_machdep.c
sys/sys/pipe.h
sys/sys/sfbuf.h
sys/vm/pmap.h

index 367cb49..a69f9c7 100644 (file)
@@ -40,7 +40,7 @@
  *
  *     from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
  * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $
- * $DragonFly: src/sys/i386/i386/Attic/pmap.c,v 1.32 2004/03/01 06:33:16 dillon Exp $
+ * $DragonFly: src/sys/i386/i386/Attic/pmap.c,v 1.33 2004/04/01 17:58:00 dillon Exp $
  */
 
 /*
@@ -689,7 +689,7 @@ pmap_extract_vmpage(pmap_t pmap, vm_offset_t va, int prot)
  * note that in order for the mapping to take effect -- you
  * should do a invltlb after doing the pmap_kenter...
  */
-PMAP_INLINE void 
+void 
 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
 {
        unsigned *pte;
@@ -704,10 +704,38 @@ pmap_kenter(vm_offset_t va, vm_paddr_t pa)
        pmap_inval_flush(&info);
 }
 
+void
+pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa)
+{
+       unsigned *pte;
+       unsigned npte;
+
+       npte = pa | PG_RW | PG_V | pgeflag;
+       pte = (unsigned *)vtopte(va);
+       *pte = npte;
+       cpu_invlpg((void *)va);
+}
+
+void
+pmap_kenter_sync(vm_offset_t va)
+{
+       pmap_inval_info info;
+
+       pmap_inval_init(&info);
+       pmap_inval_add(&info, kernel_pmap, va);
+       pmap_inval_flush(&info);
+}
+
+void
+pmap_kenter_sync_quick(vm_offset_t va)
+{
+       cpu_invlpg((void *)va);
+}
+
 /*
  * remove a page from the kernel pagetables
  */
-PMAP_INLINE void
+void
 pmap_kremove(vm_offset_t va)
 {
        unsigned *pte;
@@ -720,6 +748,15 @@ pmap_kremove(vm_offset_t va)
        pmap_inval_flush(&info);
 }
 
+void
+pmap_kremove_quick(vm_offset_t va)
+{
+       unsigned *pte;
+       pte = (unsigned *)vtopte(va);
+       *pte = 0;
+       cpu_invlpg((void *)va);
+}
+
 /*
  *     Used to map a range of physical addresses into kernel
  *     virtual address space.
@@ -870,7 +907,7 @@ pmap_swapout_proc(struct proc *p)
                        panic("pmap_swapout_proc: upage already missing???");
                vm_page_dirty(m);
                vm_page_unwire(m, 0);
-               pmap_kremove( (vm_offset_t) p->p_addr + PAGE_SIZE * i);
+               pmap_kremove((vm_offset_t)p->p_addr + (PAGE_SIZE * i));
        }
 #endif
 }
@@ -891,7 +928,7 @@ pmap_swapin_proc(struct proc *p)
 
                m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
-               pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE,
+               pmap_kenter((vm_offset_t)p->p_addr + (i * PAGE_SIZE),
                        VM_PAGE_TO_PHYS(m));
 
                if (m->valid != VM_PAGE_BITS_ALL) {
@@ -1001,7 +1038,7 @@ pmap_pinit0(struct pmap *pmap)
 {
        pmap->pm_pdir =
                (pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
-       pmap_kenter((vm_offset_t) pmap->pm_pdir, (vm_offset_t) IdlePTD);
+       pmap_kenter((vm_offset_t)pmap->pm_pdir, (vm_offset_t) IdlePTD);
        pmap->pm_count = 1;
        pmap->pm_active = 0;
        pmap->pm_ptphint = NULL;
@@ -1046,7 +1083,7 @@ pmap_pinit(struct pmap *pmap)
        vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY); /* not usually mapped*/
        ptdpg->valid = VM_PAGE_BITS_ALL;
 
-       pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
+       pmap_kenter((vm_offset_t)pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
        if ((ptdpg->flags & PG_ZERO) == 0)
                bzero(pmap->pm_pdir, PAGE_SIZE);
 
@@ -1107,7 +1144,7 @@ pmap_release_free_page(struct pmap *pmap, vm_page_t p)
                bzero(pde + KPTDI, nkpt * PTESIZE);
                pde[MPPTDI] = 0;
                pde[APTDPTDI] = 0;
-               pmap_kremove((vm_offset_t) pmap->pm_pdir);
+               pmap_kremove((vm_offset_t)pmap->pm_pdir);
        }
 
        if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex))
index fa96aa3..545ba01 100644 (file)
@@ -38,7 +38,7 @@
  *
  * @(#)kern_subr.c     8.3 (Berkeley) 1/21/94
  * $FreeBSD: src/sys/i386/i386/uio_machdep.c,v 1.1 2004/03/21 20:28:36 alc Exp $
- * $DragonFly: src/sys/i386/i386/Attic/uio_machdep.c,v 1.2 2004/03/29 15:46:15 dillon Exp $
+ * $DragonFly: src/sys/i386/i386/Attic/uio_machdep.c,v 1.3 2004/04/01 17:58:00 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -93,7 +93,7 @@ uiomove_fromphys(vm_page_t *ma, vm_offset_t offset, int n, struct uio *uio)
                page_offset = offset & PAGE_MASK;
                cnt = min(cnt, PAGE_SIZE - page_offset);
                m = ma[offset >> PAGE_SHIFT];
-               sf = sf_buf_alloc(m);
+               sf = sf_buf_alloc(m, SFBA_QUICK);
                cp = (char *)sf_buf_kva(sf) + page_offset;
                switch (uio->uio_segflg) {
                case UIO_USERSPACE:
index b501900..d2d46f3 100644 (file)
@@ -22,7 +22,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $DragonFly: src/sys/kern/kern_sfbuf.c,v 1.2 2004/03/29 15:46:18 dillon Exp $
+ * $DragonFly: src/sys/kern/kern_sfbuf.c,v 1.3 2004/04/01 17:58:02 dillon Exp $
  */
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/sfbuf.h>
+#include <sys/globaldata.h>
+#include <sys/thread.h>
+#include <sys/sysctl.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/pmap.h>
+#include <sys/thread2.h>
 
 static void sf_buf_init(void *arg);
 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
@@ -57,6 +61,9 @@ static u_int sf_buf_alloc_want;
 static vm_offset_t sf_base;
 static struct sf_buf *sf_bufs;
 
+static int sfbuf_quick = 1;
+SYSCTL_INT(_debug, OID_AUTO, sfbuf_quick, CTLFLAG_RW, &sfbuf_quick, 0, "");
+
 /*
  * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
  */
@@ -72,6 +79,7 @@ sf_buf_init(void *arg)
            M_NOWAIT | M_ZERO);
        for (i = 0; i < nsfbufs; i++) {
                sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
+               sf_bufs[i].flags |= SFBA_ONFREEQ;
                TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
        }
 }
@@ -80,48 +88,95 @@ sf_buf_init(void *arg)
  * Get an sf_buf from the freelist. Will block if none are available.
  */
 struct sf_buf *
-sf_buf_alloc(struct vm_page *m)
+sf_buf_alloc(struct vm_page *m, int flags)
 {
        struct sf_buf_list *hash_chain;
        struct sf_buf *sf;
-       int s;
+       globaldata_t gd;
        int error;
+       int pflags;
 
-       s = splimp();
+       gd = mycpu;
+       crit_enter();
        hash_chain = &sf_buf_hashtable[SF_BUF_HASH(m)];
        LIST_FOREACH(sf, hash_chain, list_entry) {
                if (sf->m == m) {
-                       if (sf->refcnt == 0) {
-                               /* reclaim cached entry off freelist */
-                               TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
-                       }
+                       /*
+                        * cache hit
+                        *
+                        * We must invalidate the TLB entry based on whether
+                        * it need only be valid on the local cpu (SFBA_QUICK),
+                        * or on all cpus.  This is conditionalized and in
+                        * most cases no system-wide invalidation should be
+                        * needed.
+                        *
+                        * Note: we do not remove the entry from the freelist
+                        * on the 0->1 transition. 
+                        */
                        ++sf->refcnt;
+                       if ((flags & SFBA_QUICK) && sfbuf_quick) {
+                               if ((sf->cpumask & gd->gd_cpumask) == 0) {
+                                       pmap_kenter_sync_quick(sf->kva);
+                                       sf->cpumask |= gd->gd_cpumask;
+                               }
+                       } else {
+                               if (sf->cpumask != (cpumask_t)-1) {
+                                       pmap_kenter_sync(sf->kva);
+                                       sf->cpumask = (cpumask_t)-1;
+                               }
+                       }
                        goto done;      /* found existing mapping */
                }
        }
 
        /*
-        * Didn't find old mapping.  Get a buffer off the freelist.
+        * Didn't find old mapping.  Get a buffer off the freelist.  We
+        * may have to remove and skip buffers with non-zero ref counts 
+        * that were lazily allocated.
         */
-       while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
-               ++sf_buf_alloc_want;
-               error = tsleep(&sf_buf_freelist, PCATCH, "sfbufa", 0);
-               --sf_buf_alloc_want;
-
-               /* If we got a signal, don't risk going back to sleep. */
-               if (error)
-                       goto done;
+       for (;;) {
+               if ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
+                       pflags = (flags & SFBA_PCATCH) ? PCATCH : 0;
+                       ++sf_buf_alloc_want;
+                       error = tsleep(&sf_buf_freelist, pflags, "sfbufa", 0);
+                       --sf_buf_alloc_want;
+                       if (error)
+                               goto done;
+               } else {
+                       /*
+                        * We may have to do delayed removals for referenced
+                        * sf_buf's here in addition to locating a sf_buf
+                        * to reuse.  The sf_bufs must be removed.
+                        *
+                        * We are finished when we find an sf_buf with a
+                        * refcnt of 0.  We theoretically do not have to
+                        * remove it from the freelist but it's a good idea
+                        * to do so to preserve LRU operation for the
+                        * (1) never before seen before case and (2) 
+                        * accidently recycled due to prior cached uses not
+                        * removing the buffer case.
+                        */
+                       KKASSERT(sf->flags & SFBA_ONFREEQ);
+                       TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
+                       sf->flags &= ~SFBA_ONFREEQ;
+                       if (sf->refcnt == 0)
+                               break;
+               }
        }
-       TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
-
        if (sf->m != NULL)      /* remove previous mapping from hash table */
                LIST_REMOVE(sf, list_entry);
        LIST_INSERT_HEAD(hash_chain, sf, list_entry);
        sf->refcnt = 1;
        sf->m = m;
-       pmap_qenter(sf->kva, &sf->m, 1);
+       if ((flags & SFBA_QUICK) && sfbuf_quick) {
+               pmap_kenter_quick(sf->kva, sf->m->phys_addr);
+               sf->cpumask = gd->gd_cpumask;
+       } else {
+               pmap_kenter(sf->kva, sf->m->phys_addr);
+               sf->cpumask = (cpumask_t)-1;
+       }
 done:
-       splx(s);
+       crit_exit();
        return (sf);
 }
 
@@ -143,7 +198,9 @@ sf_buf_ref(struct sf_buf *sf)
 
 /*
  * Lose a reference to an sf_buf. When none left, detach mapped page
- * and release resources back to the system.
+ * and release resources back to the system.  Note that the sfbuf's
+ * removal from the freelist is delayed, so it may in fact already be
+ * on the free list.  This is the optimal (and most likely) scenario.
  *
  * Must be called at splimp.
  */
@@ -153,9 +210,10 @@ sf_buf_free(struct sf_buf *sf)
        if (sf->refcnt == 0)
                panic("sf_buf_free: freeing free sf_buf");
        sf->refcnt--;
-       if (sf->refcnt == 0) {
+       if (sf->refcnt == 0 && (sf->flags & SFBA_ONFREEQ) == 0) {
                KKASSERT(sf->aux1 == 0 && sf->aux2 == 0);
                TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
+               sf->flags |= SFBA_ONFREEQ;
                if (sf_buf_alloc_want > 0)
                        wakeup_one(&sf_buf_freelist);
        }
index 86c47c0..431b315 100644 (file)
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $DragonFly: src/sys/kern/kern_xio.c,v 1.2 2004/03/31 22:08:32 dillon Exp $
+ * $DragonFly: src/sys/kern/kern_xio.c,v 1.3 2004/04/01 17:58:02 dillon Exp $
  */
 /*
  * Kernel XIO interface.  An initialized XIO is basically a collection of
@@ -235,7 +235,7 @@ xio_copy_xtou(xio_t xio, void *uptr, int bytes)
     error = 0;
     for (i = xio->xio_offset >> PAGE_SHIFT; i < xio->xio_npages; ++i) {
        m = xio->xio_pages[i];
-       sf = sf_buf_alloc(m);
+       sf = sf_buf_alloc(m, SFBA_QUICK);
        error = copyout((char *)sf_buf_kva(sf) + offset, uptr, n);
        sf_buf_free(sf);
        if (error)
@@ -279,7 +279,7 @@ xio_copy_xtok(xio_t xio, void *kptr, int bytes)
     error = 0;
     for (i = xio->xio_offset >> PAGE_SHIFT; i < xio->xio_npages; ++i) {
        m = xio->xio_pages[i];
-       sf = sf_buf_alloc(m);
+       sf = sf_buf_alloc(m, SFBA_QUICK);
        bcopy((char *)sf_buf_kva(sf) + offset, kptr, n);
        sf_buf_free(sf);
        bytes -= n;
index f61df8a..6c75475 100644 (file)
@@ -17,7 +17,7 @@
  *    are met.
  *
  * $FreeBSD: src/sys/kern/sys_pipe.c,v 1.60.2.13 2002/08/05 15:05:15 des Exp $
- * $DragonFly: src/sys/kern/sys_pipe.c,v 1.16 2004/03/30 19:14:11 dillon Exp $
+ * $DragonFly: src/sys/kern/sys_pipe.c,v 1.17 2004/04/01 17:58:02 dillon Exp $
  */
 
 /*
@@ -180,7 +180,6 @@ static __inline void pipeunlock (struct pipe *cpipe);
 static __inline void pipeselwakeup (struct pipe *cpipe);
 #ifndef PIPE_NODIRECT
 static int pipe_build_write_buffer (struct pipe *wpipe, struct uio *uio);
-static void pipe_destroy_write_buffer (struct pipe *wpipe);
 static int pipe_direct_write (struct pipe *wpipe, struct uio *uio);
 static void pipe_clone_write_buffer (struct pipe *wpipe);
 #endif
@@ -435,20 +434,20 @@ pipe_read(struct file *fp, struct uio *uio, struct ucred *cred,
                        nread += size;
 #ifndef PIPE_NODIRECT
                /*
-                * Direct copy, bypassing a kernel buffer.
+                * Direct copy, bypassing a kernel buffer.  We cannot mess
+                * with the direct-write buffer until PIPE_DIRECTIP is
+                * cleared.  In order to prevent the pipe_write code from
+                * racing itself in direct_write, we set DIRECTIP when we
+                * clear DIRECTW after we have exhausted the buffer.
                 */
-               } else if ((size = rpipe->pipe_map.cnt) &&
-                          (rpipe->pipe_state & PIPE_DIRECTW)) {
-                       if (size > (u_int) uio->uio_resid)
-                               size = (u_int) uio->uio_resid;
-                       error = uiomove_fromphys(rpipe->pipe_map.ms,
-                                       rpipe->pipe_map.pos, size, uio);
+               } else if (rpipe->pipe_map.xio_bytes &&
+                          (rpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) == PIPE_DIRECTW) {
+                       error = xio_uio_copy(&rpipe->pipe_map, uio, &size);
                        if (error)
                                break;
                        nread += size;
-                       rpipe->pipe_map.pos += size;
-                       rpipe->pipe_map.cnt -= size;
-                       if (rpipe->pipe_map.cnt == 0) {
+                       if (rpipe->pipe_map.xio_bytes == 0) {
+                               rpipe->pipe_state |= PIPE_DIRECTIP;
                                rpipe->pipe_state &= ~PIPE_DIRECTW;
                                wakeup(rpipe);
                        }
@@ -538,41 +537,19 @@ pipe_build_write_buffer(wpipe, uio)
        struct pipe *wpipe;
        struct uio *uio;
 {
+       int error;
        u_int size;
-       int i;
-       vm_offset_t addr, endaddr;
-       vm_paddr_t paddr;
 
        size = (u_int) uio->uio_iov->iov_len;
        if (size > wpipe->pipe_buffer.size)
                size = wpipe->pipe_buffer.size;
+       if (size > XIO_INTERNAL_SIZE)
+               size = XIO_INTERNAL_SIZE;
 
-       endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
-       addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
-       for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {
-               vm_page_t m;
-
-               if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 ||
-                   (paddr = pmap_kextract(addr)) == 0) {
-                       int j;
-
-                       for (j = 0; j < i; j++)
-                               vm_page_unhold(wpipe->pipe_map.ms[j]);
-                       return (EFAULT);
-               }
-
-               m = PHYS_TO_VM_PAGE(paddr);
-               vm_page_hold(m);
-               wpipe->pipe_map.ms[i] = m;
-       }
-
-       /*
-        * set up the control block
-        */
-       wpipe->pipe_map.npages = i;
-       wpipe->pipe_map.pos =
-           ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
-       wpipe->pipe_map.cnt = size;
+       error = xio_init_ubuf(&wpipe->pipe_map, uio->uio_iov->iov_base, 
+                               size, XIOF_READ);
+       if (error)
+               return(error);
 
        /*
         * and update the uio data
@@ -587,22 +564,6 @@ pipe_build_write_buffer(wpipe, uio)
 }
 
 /*
- * unmap and unwire the process buffer
- */
-static void
-pipe_destroy_write_buffer(wpipe)
-       struct pipe *wpipe;
-{
-       int i;
-
-       for (i = 0; i < wpipe->pipe_map.npages; i++) {
-               vm_page_unhold(wpipe->pipe_map.ms[i]);
-               wpipe->pipe_map.ms[i] = NULL;   /* sanity */
-       }
-       wpipe->pipe_map.npages = 0;
-}
-
-/*
  * In the case of a signal, the writing process might go away.  This
  * code copies the data into the circular buffer so that the source
  * pages can be freed without loss of data.
@@ -611,31 +572,17 @@ static void
 pipe_clone_write_buffer(wpipe)
        struct pipe *wpipe;
 {
-       struct uio uio;
-       struct iovec iov;
        int size;
-       int pos;
 
-       size = wpipe->pipe_map.cnt;
-       pos = wpipe->pipe_map.pos;
+       size = wpipe->pipe_map.xio_bytes;
 
        wpipe->pipe_buffer.in = size;
        wpipe->pipe_buffer.out = 0;
        wpipe->pipe_buffer.cnt = size;
-       wpipe->pipe_state &= ~PIPE_DIRECTW;
-
-       iov.iov_base = wpipe->pipe_buffer.buffer;
-       iov.iov_len = size;
-       uio.uio_iov = &iov;
-       uio.uio_iovcnt = 1;
-       uio.uio_offset = 0;
-       uio.uio_resid = size;
-       uio.uio_segflg = UIO_SYSSPACE;
-       uio.uio_rw = UIO_READ;
-       uio.uio_td = curthread;
-       uiomove_fromphys(wpipe->pipe_map.ms, pos, size, &uio);
-
-       pipe_destroy_write_buffer(wpipe);
+       wpipe->pipe_state &= ~(PIPE_DIRECTW | PIPE_DIRECTIP);
+
+       xio_copy_xtok(&wpipe->pipe_map, wpipe->pipe_buffer.buffer, size);
+       xio_release(&wpipe->pipe_map);
 }
 
 /*
@@ -653,7 +600,7 @@ pipe_direct_write(wpipe, uio)
        int error;
 
 retry:
-       while (wpipe->pipe_state & PIPE_DIRECTW) {
+       while (wpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) {
                if (wpipe->pipe_state & PIPE_WANTR) {
                        wpipe->pipe_state &= ~PIPE_WANTR;
                        wakeup(wpipe);
@@ -661,13 +608,13 @@ retry:
                wpipe->pipe_state |= PIPE_WANTW;
                error = tsleep(wpipe, PCATCH, "pipdww", 0);
                if (error)
-                       goto error1;
+                       goto error2;
                if (wpipe->pipe_state & PIPE_EOF) {
                        error = EPIPE;
-                       goto error1;
+                       goto error2;
                }
        }
-       wpipe->pipe_map.cnt = 0;        /* transfer not ready yet */
+       KKASSERT(wpipe->pipe_map.xio_bytes == 0);
        if (wpipe->pipe_buffer.cnt > 0) {
                if (wpipe->pipe_state & PIPE_WANTR) {
                        wpipe->pipe_state &= ~PIPE_WANTR;
@@ -677,10 +624,10 @@ retry:
                wpipe->pipe_state |= PIPE_WANTW;
                error = tsleep(wpipe, PCATCH, "pipdwc", 0);
                if (error)
-                       goto error1;
+                       goto error2;
                if (wpipe->pipe_state & PIPE_EOF) {
                        error = EPIPE;
-                       goto error1;
+                       goto error2;
                }
                goto retry;
        }
@@ -688,12 +635,11 @@ retry:
        /*
         * Build our direct-write buffer
         */
-       wpipe->pipe_state |= PIPE_DIRECTW;
+       wpipe->pipe_state |= PIPE_DIRECTW | PIPE_DIRECTIP;
        error = pipe_build_write_buffer(wpipe, uio);
-       if (error) {
-               wpipe->pipe_state &= ~PIPE_DIRECTW;
+       if (error)
                goto error1;
-       }
+       wpipe->pipe_state &= ~PIPE_DIRECTIP;
 
        /*
         * Wait until the receiver has snarfed the data.  Since we are likely
@@ -704,7 +650,7 @@ retry:
        while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
                if (wpipe->pipe_state & PIPE_EOF) {
                        pipelock(wpipe, 0);
-                       pipe_destroy_write_buffer(wpipe);
+                       xio_release(&wpipe->pipe_map);
                        pipeunlock(wpipe);
                        pipeselwakeup(wpipe);
                        error = EPIPE;
@@ -717,7 +663,6 @@ retry:
                pipeselwakeup(wpipe);
                error = tsleep(wpipe, PCATCH|PNORESCHED, "pipdwt", 0);
        }
-
        pipelock(wpipe,0);
        if (wpipe->pipe_state & PIPE_DIRECTW) {
                /*
@@ -725,13 +670,26 @@ retry:
                 * the process that might be going away.
                 */
                pipe_clone_write_buffer(wpipe);
+               KKASSERT((wpipe->pipe_state & PIPE_DIRECTIP) == 0);
        } else {
-               pipe_destroy_write_buffer(wpipe);
+               KKASSERT(wpipe->pipe_state & PIPE_DIRECTIP);
+               xio_release(&wpipe->pipe_map);
+               wpipe->pipe_state &= ~PIPE_DIRECTIP;
        }
        pipeunlock(wpipe);
        return (error);
 
+       /*
+        * Direct-write error, clear the direct write flags.
+        */
 error1:
+       wpipe->pipe_state &= ~(PIPE_DIRECTW | PIPE_DIRECTIP);
+       /* fallthrough */
+
+       /*
+        * General error, wakeup the other side if it happens to be sleeping.
+        */
+error2:
        wakeup(wpipe);
        return (error);
 }
@@ -762,7 +720,7 @@ pipe_write(struct file *fp, struct uio *uio, struct ucred *cred,
         */
        if ((uio->uio_resid > PIPE_SIZE) &&
                (pipe_nbig < pipe_maxbig) &&
-               (wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
+               (wpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) == 0 &&
                (wpipe->pipe_buffer.size <= PIPE_SIZE) &&
                (wpipe->pipe_buffer.cnt == 0)) {
 
@@ -821,7 +779,7 @@ pipe_write(struct file *fp, struct uio *uio, struct ucred *cred,
                 * reader goes away.
                 */
        retrywrite:
-               while (wpipe->pipe_state & PIPE_DIRECTW) {
+               while (wpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) {
                        if (wpipe->pipe_state & PIPE_WANTR) {
                                wpipe->pipe_state &= ~PIPE_WANTR;
                                wakeup(wpipe);
@@ -857,7 +815,7 @@ pipe_write(struct file *fp, struct uio *uio, struct ucred *cred,
                                 * It is possible for a direct write to
                                 * slip in on us... handle it here...
                                 */
-                               if (wpipe->pipe_state & PIPE_DIRECTW) {
+                               if (wpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) {
                                        pipeunlock(wpipe);
                                        goto retrywrite;
                                }
@@ -1031,10 +989,11 @@ pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct thread *td)
                return (0);
 
        case FIONREAD:
-               if (mpipe->pipe_state & PIPE_DIRECTW)
-                       *(int *)data = mpipe->pipe_map.cnt;
-               else
+               if (mpipe->pipe_state & PIPE_DIRECTW) {
+                       *(int *)data = mpipe->pipe_map.xio_bytes;
+               } else {
                        *(int *)data = mpipe->pipe_buffer.cnt;
+               }
                return (0);
 
        case FIOSETOWN:
@@ -1144,9 +1103,9 @@ pipe_free_kmem(struct pipe *cpipe)
                cpipe->pipe_buffer.object = NULL;
        }
 #ifndef PIPE_NODIRECT
-       cpipe->pipe_map.cnt = 0;
-       cpipe->pipe_map.pos = 0;
-       cpipe->pipe_map.npages = 0;
+       KKASSERT(cpipe->pipe_map.xio_bytes == 0 &&
+               cpipe->pipe_map.xio_offset == 0 &&
+               cpipe->pipe_map.xio_npages == 0);
 #endif
 }
 
@@ -1196,12 +1155,11 @@ pipeclose(struct pipe *cpipe)
                pipe_free_kmem(cpipe);
                free(cpipe, M_PIPE);
        } else {
-               KKASSERT(cpipe->pipe_map.npages == 0);
-
+               KKASSERT(cpipe->pipe_map.xio_npages == 0 &&
+                       cpipe->pipe_map.xio_bytes == 0 &&
+                       cpipe->pipe_map.xio_offset == 0);
                cpipe->pipe_state = 0;
                cpipe->pipe_busy = 0;
-               cpipe->pipe_map.cnt = 0;
-               cpipe->pipe_map.pos = 0;
                cpipe->pipe_peer = gd->gd_pipeq;
                gd->gd_pipeq = cpipe;
                ++gd->gd_pipeqcount;
@@ -1251,7 +1209,7 @@ filt_piperead(struct knote *kn, long hint)
 
        kn->kn_data = rpipe->pipe_buffer.cnt;
        if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
-               kn->kn_data = rpipe->pipe_map.cnt;
+               kn->kn_data = rpipe->pipe_map.xio_bytes;
 
        if ((rpipe->pipe_state & PIPE_EOF) ||
            (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
index a6b4aa8..9bbeded 100644 (file)
@@ -35,7 +35,7 @@
  *
  *     @(#)uipc_syscalls.c     8.4 (Berkeley) 2/21/94
  * $FreeBSD: src/sys/kern/uipc_syscalls.c,v 1.65.2.17 2003/04/04 17:11:16 tegge Exp $
- * $DragonFly: src/sys/kern/uipc_syscalls.c,v 1.29 2004/03/29 15:46:18 dillon Exp $
+ * $DragonFly: src/sys/kern/uipc_syscalls.c,v 1.30 2004/04/01 17:58:02 dillon Exp $
  */
 
 #include "opt_ktrace.h"
@@ -1557,7 +1557,7 @@ retry_lookup:
                 * Get a sendfile buf. We usually wait as long as necessary,
                 * but this wait can be interrupted.
                 */
-               if ((sf = sf_buf_alloc(pg)) == NULL) {
+               if ((sf = sf_buf_alloc(pg, SFBA_PCATCH)) == NULL) {
                        s = splvm();
                        vm_page_unwire(pg, 0);
                        if (pg->wire_count == 0 && pg->object == NULL)
index 53732a3..4deb124 100644 (file)
@@ -40,7 +40,7 @@
  *
  *     from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
  * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $
- * $DragonFly: src/sys/platform/pc32/i386/pmap.c,v 1.32 2004/03/01 06:33:16 dillon Exp $
+ * $DragonFly: src/sys/platform/pc32/i386/pmap.c,v 1.33 2004/04/01 17:58:00 dillon Exp $
  */
 
 /*
@@ -689,7 +689,7 @@ pmap_extract_vmpage(pmap_t pmap, vm_offset_t va, int prot)
  * note that in order for the mapping to take effect -- you
  * should do a invltlb after doing the pmap_kenter...
  */
-PMAP_INLINE void 
+void 
 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
 {
        unsigned *pte;
@@ -704,10 +704,38 @@ pmap_kenter(vm_offset_t va, vm_paddr_t pa)
        pmap_inval_flush(&info);
 }
 
+void
+pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa)
+{
+       unsigned *pte;
+       unsigned npte;
+
+       npte = pa | PG_RW | PG_V | pgeflag;
+       pte = (unsigned *)vtopte(va);
+       *pte = npte;
+       cpu_invlpg((void *)va);
+}
+
+void
+pmap_kenter_sync(vm_offset_t va)
+{
+       pmap_inval_info info;
+
+       pmap_inval_init(&info);
+       pmap_inval_add(&info, kernel_pmap, va);
+       pmap_inval_flush(&info);
+}
+
+void
+pmap_kenter_sync_quick(vm_offset_t va)
+{
+       cpu_invlpg((void *)va);
+}
+
 /*
  * remove a page from the kernel pagetables
  */
-PMAP_INLINE void
+void
 pmap_kremove(vm_offset_t va)
 {
        unsigned *pte;
@@ -720,6 +748,15 @@ pmap_kremove(vm_offset_t va)
        pmap_inval_flush(&info);
 }
 
+void
+pmap_kremove_quick(vm_offset_t va)
+{
+       unsigned *pte;
+       pte = (unsigned *)vtopte(va);
+       *pte = 0;
+       cpu_invlpg((void *)va);
+}
+
 /*
  *     Used to map a range of physical addresses into kernel
  *     virtual address space.
@@ -870,7 +907,7 @@ pmap_swapout_proc(struct proc *p)
                        panic("pmap_swapout_proc: upage already missing???");
                vm_page_dirty(m);
                vm_page_unwire(m, 0);
-               pmap_kremove( (vm_offset_t) p->p_addr + PAGE_SIZE * i);
+               pmap_kremove((vm_offset_t)p->p_addr + (PAGE_SIZE * i));
        }
 #endif
 }
@@ -891,7 +928,7 @@ pmap_swapin_proc(struct proc *p)
 
                m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
-               pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE,
+               pmap_kenter((vm_offset_t)p->p_addr + (i * PAGE_SIZE),
                        VM_PAGE_TO_PHYS(m));
 
                if (m->valid != VM_PAGE_BITS_ALL) {
@@ -1001,7 +1038,7 @@ pmap_pinit0(struct pmap *pmap)
 {
        pmap->pm_pdir =
                (pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
-       pmap_kenter((vm_offset_t) pmap->pm_pdir, (vm_offset_t) IdlePTD);
+       pmap_kenter((vm_offset_t)pmap->pm_pdir, (vm_offset_t) IdlePTD);
        pmap->pm_count = 1;
        pmap->pm_active = 0;
        pmap->pm_ptphint = NULL;
@@ -1046,7 +1083,7 @@ pmap_pinit(struct pmap *pmap)
        vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY); /* not usually mapped*/
        ptdpg->valid = VM_PAGE_BITS_ALL;
 
-       pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
+       pmap_kenter((vm_offset_t)pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
        if ((ptdpg->flags & PG_ZERO) == 0)
                bzero(pmap->pm_pdir, PAGE_SIZE);
 
@@ -1107,7 +1144,7 @@ pmap_release_free_page(struct pmap *pmap, vm_page_t p)
                bzero(pde + KPTDI, nkpt * PTESIZE);
                pde[MPPTDI] = 0;
                pde[APTDPTDI] = 0;
-               pmap_kremove((vm_offset_t) pmap->pm_pdir);
+               pmap_kremove((vm_offset_t)pmap->pm_pdir);
        }
 
        if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex))
index e97a67c..e3cb5d5 100644 (file)
@@ -38,7 +38,7 @@
  *
  * @(#)kern_subr.c     8.3 (Berkeley) 1/21/94
  * $FreeBSD: src/sys/i386/i386/uio_machdep.c,v 1.1 2004/03/21 20:28:36 alc Exp $
- * $DragonFly: src/sys/platform/pc32/i386/Attic/uio_machdep.c,v 1.2 2004/03/29 15:46:15 dillon Exp $
+ * $DragonFly: src/sys/platform/pc32/i386/Attic/uio_machdep.c,v 1.3 2004/04/01 17:58:00 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -93,7 +93,7 @@ uiomove_fromphys(vm_page_t *ma, vm_offset_t offset, int n, struct uio *uio)
                page_offset = offset & PAGE_MASK;
                cnt = min(cnt, PAGE_SIZE - page_offset);
                m = ma[offset >> PAGE_SHIFT];
-               sf = sf_buf_alloc(m);
+               sf = sf_buf_alloc(m, SFBA_QUICK);
                cp = (char *)sf_buf_kva(sf) + page_offset;
                switch (uio->uio_segflg) {
                case UIO_USERSPACE:
index 7fdcceb..22daa5b 100644 (file)
@@ -19,7 +19,7 @@
  *    are met.
  *
  * $FreeBSD: src/sys/sys/pipe.h,v 1.16 1999/12/29 04:24:45 peter Exp $
- * $DragonFly: src/sys/sys/pipe.h,v 1.4 2004/03/28 08:25:46 dillon Exp $
+ * $DragonFly: src/sys/sys/pipe.h,v 1.5 2004/04/01 17:58:06 dillon Exp $
  */
 
 #ifndef _SYS_PIPE_H_
 #ifndef _KERNEL
 #include <sys/time.h>                  /* for struct timespec */
 #include <sys/select.h>                        /* for struct selinfo */
-#include <vm/vm.h>                     /* for vm_page_t */
 #include <machine/param.h>             /* for PAGE_SIZE */
 #endif
 
+#if !defined(_SYS_XIO_H_)
+#include <sys/xio.h>                   /* for struct xio */
+#endif
+
 /*
  * Pipe buffer size, keep moderate in value, pipes take kva space.
  */
@@ -51,8 +54,6 @@
 #define PIPE_MINDIRECT 8192
 #endif
 
-#define PIPENPAGES     (BIG_PIPE_SIZE / PAGE_SIZE + 1)
-
 /*
  * Pipe buffer information.
  * Separate in, out, cnt are used to simplify calculations.
@@ -68,28 +69,19 @@ struct pipebuf {
 };
 
 /*
- * Information to support direct transfers between processes for pipes.
- */
-struct pipemapping {
-       vm_size_t       cnt;            /* number of chars in buffer */
-       vm_size_t       pos;            /* current position of transfer */
-       int             npages;         /* number of pages */
-       vm_page_t       ms[PIPENPAGES]; /* pages in source process */
-};
-
-/*
  * Bits in pipe_state.
  */
-#define PIPE_ASYNC     0x004   /* Async? I/O. */
-#define PIPE_WANTR     0x008   /* Reader wants some characters. */
-#define PIPE_WANTW     0x010   /* Writer wants space to put characters. */
-#define PIPE_WANT      0x020   /* Pipe is wanted to be run-down. */
-#define PIPE_SEL       0x040   /* Pipe has a select active. */
-#define PIPE_EOF       0x080   /* Pipe is in EOF condition. */
-#define PIPE_LOCK      0x100   /* Process has exclusive access to pointers/data. */
-#define PIPE_LWANT     0x200   /* Process wants exclusive access to pointers/data. */
-#define PIPE_DIRECTW   0x400   /* Pipe direct write active. */
-#define PIPE_DIRECTOK  0x800   /* Direct mode ok. */
+#define PIPE_ASYNC     0x0004  /* Async? I/O. */
+#define PIPE_WANTR     0x0008  /* Reader wants some characters. */
+#define PIPE_WANTW     0x0010  /* Writer wants space to put characters. */
+#define PIPE_WANT      0x0020  /* Pipe is wanted to be run-down. */
+#define PIPE_SEL       0x0040  /* Pipe has a select active. */
+#define PIPE_EOF       0x0080  /* Pipe is in EOF condition. */
+#define PIPE_LOCK      0x0100  /* Process has exclusive access to pointers/data. */
+#define PIPE_LWANT     0x0200  /* Process wants exclusive access to pointers/data. */
+#define PIPE_DIRECTW   0x0400  /* Pipe direct write active. */
+#define PIPE_DIRECTOK  0x0800  /* Direct mode ok. */
+#define PIPE_DIRECTIP  0x1000  /* Direct write buffer build in progress */
 
 /*
  * Per-pipe data structure.
@@ -97,7 +89,7 @@ struct pipemapping {
  */
 struct pipe {
        struct  pipebuf pipe_buffer;    /* data storage */
-       struct  pipemapping pipe_map;   /* pipe mapping for direct I/O */
+       struct  xio pipe_map;           /* mapping for direct I/O */
        struct  selinfo pipe_sel;       /* for compat with select */
        struct  timespec pipe_atime;    /* time of last access */
        struct  timespec pipe_mtime;    /* time of last modify */
index ec8293f..b525e88 100644 (file)
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $DragonFly: src/sys/sys/sfbuf.h,v 1.3 2004/03/29 15:46:21 dillon Exp $
+ * $DragonFly: src/sys/sys/sfbuf.h,v 1.4 2004/04/01 17:58:06 dillon Exp $
  */
 
 #ifndef _SFBUF_H_
@@ -35,10 +35,19 @@ struct sf_buf {
        struct          vm_page *m;     /* currently mapped page */
        vm_offset_t     kva;            /* va of mapping */
        int             refcnt;         /* usage of this mapping */
+       int             flags;          /* global SFBA flags */
+       cpumask_t       cpumask;        /* cpu mapping synchronization */
        int             aux1;           /* auxillary counter TEMPORARY HACK */
        int             aux2;           /* auxillary counter TEMPORARY HACK */
 };
 
+/*
+ * sf_buf_alloc() flags (not all are stored in sf->flags)
+ */
+#define SFBA_QUICK     0x0001          /* sync mapping to current cpu only */
+#define SFBA_ONFREEQ   0x0002          /* on the free queue (lazy move) */
+#define SFBA_PCATCH    0x0004          /* allow interruption */
+
 static __inline vm_offset_t
 sf_buf_kva(struct sf_buf *sf)
 {
@@ -56,7 +65,7 @@ sf_buf_page(struct sf_buf *sf)
 
 extern int nsfbufs;
 
-struct sf_buf  *sf_buf_alloc(struct vm_page *);
+struct sf_buf  *sf_buf_alloc(struct vm_page *, int flags);
 void           sf_buf_free(struct sf_buf *);
 void           sf_buf_ref(struct sf_buf *);
 struct sf_buf  *sf_buf_tosf(caddr_t addr);
index a386e0a..ddee578 100644 (file)
@@ -62,7 +62,7 @@
  * rights to redistribute these changes.
  *
  * $FreeBSD: src/sys/vm/pmap.h,v 1.33.2.4 2002/03/06 22:44:24 silby Exp $
- * $DragonFly: src/sys/vm/pmap.h,v 1.9 2004/01/18 12:29:50 dillon Exp $
+ * $DragonFly: src/sys/vm/pmap.h,v 1.10 2004/04/01 17:58:08 dillon Exp $
  */
 
 /*
@@ -112,8 +112,6 @@ void                 pmap_growkernel (vm_offset_t);
 void            pmap_init (vm_paddr_t, vm_paddr_t);
 boolean_t       pmap_is_modified (vm_page_t m);
 boolean_t       pmap_ts_referenced (vm_page_t m);
-void            pmap_kenter (vm_offset_t va, vm_paddr_t pa);
-void            pmap_kremove (vm_offset_t);
 vm_offset_t     pmap_map (vm_offset_t, vm_paddr_t, vm_paddr_t, int);
 void            pmap_object_init_pt (pmap_t pmap, vm_offset_t addr,
                    vm_object_t object, vm_pindex_t pindex, vm_offset_t size,
@@ -130,6 +128,12 @@ void                pmap_protect (pmap_t, vm_offset_t, vm_offset_t,
                    vm_prot_t);
 void            pmap_qenter (vm_offset_t, vm_page_t *, int);
 void            pmap_qremove (vm_offset_t, int);
+void            pmap_kenter (vm_offset_t, vm_paddr_t);
+void            pmap_kenter_quick (vm_offset_t, vm_paddr_t);
+void            pmap_kenter_sync (vm_offset_t);
+void            pmap_kenter_sync_quick (vm_offset_t);
+void            pmap_kremove (vm_offset_t);
+void            pmap_kremove_quick (vm_offset_t);
 void            pmap_reference (pmap_t);
 void            pmap_release (pmap_t);
 void            pmap_remove (pmap_t, vm_offset_t, vm_offset_t);