From 03aa69bddd845722e92210daed7468005f6a59d0 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Tue, 1 Mar 2005 23:35:16 +0000 Subject: [PATCH] Clean up the XIO API and structure. XIO no longer tries to 'track' partial copies into or out of an XIO. It no longer adjusts xio_offset or xio_bytes once they have been initialized. Instead, a relative offset is now passed to API calls to handle partial copies. This makes the API a lot less confusing and makes the XIO structure a lot more flexible, shareable, and more suitable for use by higher level entities (buffer cache, pipe code, upcoming MSFBUF work, etc). --- sys/kern/kern_xio.c | 86 ++++++++++++++++++++++---------------------- sys/kern/lwkt_caps.c | 4 +-- sys/kern/sys_pipe.c | 59 ++++++++++++++++++------------ sys/sys/pipe.h | 6 +++- sys/sys/xio.h | 51 ++++++++++++++++++++------ 5 files changed, 125 insertions(+), 81 deletions(-) diff --git a/sys/kern/kern_xio.c b/sys/kern/kern_xio.c index 94ddf47dde..f976f2e7fc 100644 --- a/sys/kern/kern_xio.c +++ b/sys/kern/kern_xio.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/kern/kern_xio.c,v 1.7 2004/07/16 05:51:10 dillon Exp $ + * $DragonFly: src/sys/kern/kern_xio.c,v 1.8 2005/03/01 23:35:13 dillon Exp $ */ /* * Kernel XIO interface. An initialized XIO is basically a collection of @@ -86,10 +86,8 @@ xio_init(xio_t xio) /* * Initialize an XIO given a userspace buffer. 0 is returned on success, * an error code on failure. The actual number of bytes that could be - * accomodated in the XIO will be stored in xio_bytes. - * - * Note that you cannot legally accessed a previously cached linmap with - * a newly initialized xio until after calling xio_linmap(). + * accomodated in the XIO will be stored in xio_bytes and the page offset + * will be stored in xio_offset. */ int xio_init_ubuf(xio_t xio, void *ubase, size_t ubytes, int flags) @@ -148,12 +146,8 @@ xio_init_ubuf(xio_t xio, void *ubase, size_t ubytes, int flags) /* * Initialize an XIO given a kernelspace buffer. 0 is returned on success, * an error code on failure. The actual number of bytes that could be - * accomodated in the XIO will be stored in xio_bytes. - * - * vmprot is usually either VM_PROT_READ or VM_PROT_WRITE. - * - * Note that you cannot legally accessed a previously cached linmap with - * a newly initialized xio until after calling xio_linmap(). + * accomodated in the XIO will be stored in xio_bytes and the page offset + * will be stored in xio_offset. */ int xio_init_kbuf(xio_t xio, void *kbase, size_t kbytes) @@ -202,11 +196,7 @@ xio_init_kbuf(xio_t xio, void *kbase, size_t kbytes) /* * Cleanup an XIO so it can be destroyed. The pages associated with the - * XIO are released. If a linear mapping buffer is active, it will be - * unlocked but the mappings will be left intact for optimal reconstitution - * in a later xio_linmap() call. - * - * Note that you cannot legally accessed the linmap on a released XIO. + * XIO are released. */ void xio_release(xio_t xio) @@ -221,10 +211,6 @@ xio_release(xio_t xio) vm_page_unhold(m); } splx(s); - if (xio->xio_flags & XIOF_LINMAP) { - xio->xio_flags &= ~XIOF_LINMAP; - /* XXX */ - } xio->xio_offset = 0; xio->xio_npages = 0; xio->xio_bytes = 0; @@ -233,39 +219,46 @@ xio_release(xio_t xio) /* * Copy data between an XIO and a UIO. If the UIO represents userspace it - * must be relative to the current context. Both the UIO and the XIO are - * modified, but the XIO's pages are not released when exhausted. + * must be relative to the current context. + * + * uoffset is the abstracted starting offset in the XIO, not the actual + * offset, and usually starts at 0. + * + * The XIO is not modified. The UIO is updated to reflect the copy. * * UIO_READ xio -> uio * UIO_WRITE uio -> xio */ int -xio_uio_copy(xio_t xio, struct uio *uio, int *sizep) +xio_uio_copy(xio_t xio, int uoffset, struct uio *uio, int *sizep) { int error; int bytes; - if ((bytes = xio->xio_bytes) > uio->uio_resid) + bytes = xio->xio_bytes - uoffset; + if (bytes > uio->uio_resid) bytes = uio->uio_resid; - error = uiomove_fromphys(xio->xio_pages, xio->xio_offset, bytes, uio); - if (error == 0) { - xio->xio_bytes -= bytes; - xio->xio_offset += bytes; + KKASSERT(bytes >= 0); + error = uiomove_fromphys(xio->xio_pages, xio->xio_offset + uoffset, + bytes, uio); + if (error == 0) *sizep = bytes; - } else { + else *sizep = 0; - } return(error); } /* * Copy the specified number of bytes from the xio to a userland - * buffer. Return an error code or 0 on success. + * buffer. Return an error code or 0 on success. * - * The XIO is modified, but the XIO's pages are not released when exhausted. + * uoffset is the abstracted starting offset in the XIO, not the actual + * offset, and usually starts at 0. + * + * The XIO is not modified. */ int -xio_copy_xtou(xio_t xio, void *uptr, int bytes) +xio_copy_xtou(xio_t xio, int uoffset, void *uptr, int bytes) { int i; int n; @@ -277,12 +270,15 @@ xio_copy_xtou(xio_t xio, void *uptr, int bytes) if (bytes > xio->xio_bytes) return(EFAULT); - offset = xio->xio_offset & PAGE_MASK; + offset = (xio->xio_offset + uoffset) & PAGE_MASK; if ((n = PAGE_SIZE - offset) > bytes) n = bytes; error = 0; - for (i = xio->xio_offset >> PAGE_SHIFT; i < xio->xio_npages; ++i) { + for (i = (xio->xio_offset + uoffset) >> PAGE_SHIFT; + i < xio->xio_npages; + ++i + ) { m = xio->xio_pages[i]; sf = sf_buf_alloc(m, SFBA_QUICK); error = copyout((char *)sf_buf_kva(sf) + offset, uptr, n); @@ -290,8 +286,6 @@ xio_copy_xtou(xio_t xio, void *uptr, int bytes) if (error) break; bytes -= n; - xio->xio_bytes -= n; - xio->xio_offset += n; uptr = (char *)uptr + n; if (bytes == 0) break; @@ -306,10 +300,13 @@ xio_copy_xtou(xio_t xio, void *uptr, int bytes) * Copy the specified number of bytes from the xio to a kernel * buffer. Return an error code or 0 on success. * - * The XIO is modified, but the XIO's pages are not released when exhausted. + * uoffset is the abstracted starting offset in the XIO, not the actual + * offset, and usually starts at 0. + * + * The XIO is not modified. */ int -xio_copy_xtok(xio_t xio, void *kptr, int bytes) +xio_copy_xtok(xio_t xio, int uoffset, void *kptr, int bytes) { int i; int n; @@ -318,22 +315,23 @@ xio_copy_xtok(xio_t xio, void *kptr, int bytes) vm_page_t m; struct sf_buf *sf; - if (bytes > xio->xio_bytes) + if (bytes + uoffset > xio->xio_bytes) return(EFAULT); - offset = xio->xio_offset & PAGE_MASK; + offset = (xio->xio_offset + uoffset) & PAGE_MASK; if ((n = PAGE_SIZE - offset) > bytes) n = bytes; error = 0; - for (i = xio->xio_offset >> PAGE_SHIFT; i < xio->xio_npages; ++i) { + for (i = (xio->xio_offset + uoffset) >> PAGE_SHIFT; + i < xio->xio_npages; + ++i + ) { m = xio->xio_pages[i]; sf = sf_buf_alloc(m, SFBA_QUICK); bcopy((char *)sf_buf_kva(sf) + offset, kptr, n); sf_buf_free(sf); bytes -= n; - xio->xio_bytes -= n; - xio->xio_offset += n; kptr = (char *)kptr + n; if (bytes == 0) break; diff --git a/sys/kern/lwkt_caps.c b/sys/kern/lwkt_caps.c index 136dc3e59e..648247ce2c 100644 --- a/sys/kern/lwkt_caps.c +++ b/sys/kern/lwkt_caps.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/kern/lwkt_caps.c,v 1.5 2004/07/16 05:51:10 dillon Exp $ + * $DragonFly: src/sys/kern/lwkt_caps.c,v 1.6 2005/03/01 23:35:13 dillon Exp $ */ /* @@ -889,7 +889,7 @@ caps_process_msg(caps_kinfo_t caps, caps_kmsg_t msg, struct caps_sys_get_args *u caps_dequeue_msg(caps, msg); if (msg->km_xio.xio_bytes != 0) { - error = xio_copy_xtou(&msg->km_xio, uap->msg, + error = xio_copy_xtou(&msg->km_xio, 0, uap->msg, min(msg->km_xio.xio_bytes, uap->maxsize)); if (error) { if (msg->km_mcaps->ci_td && msg->km_mcaps->ci_td->td_proc) { diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index e5971f86bf..ea777a7d46 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -17,7 +17,7 @@ * are met. * * $FreeBSD: src/sys/kern/sys_pipe.c,v 1.60.2.13 2002/08/05 15:05:15 des Exp $ - * $DragonFly: src/sys/kern/sys_pipe.c,v 1.25 2004/11/12 00:09:24 dillon Exp $ + * $DragonFly: src/sys/kern/sys_pipe.c,v 1.26 2005/03/01 23:35:14 dillon Exp $ */ /* @@ -473,22 +473,23 @@ pipe_read(struct file *fp, struct uio *uio, struct ucred *cred, /* * Direct copy using source-side kva mapping */ - size = rpipe->pipe_map.xio_bytes; + size = rpipe->pipe_map.xio_bytes - + rpipe->pipe_buffer.out; if (size > (u_int)uio->uio_resid) size = (u_int)uio->uio_resid; - va = (caddr_t)rpipe->pipe_kva + rpipe->pipe_map.xio_offset; + va = (caddr_t)rpipe->pipe_kva + + xio_kvaoffset(&rpipe->pipe_map, rpipe->pipe_buffer.out); error = uiomove(va, size, uio); if (error) break; nread += size; - rpipe->pipe_map.xio_offset += size; - rpipe->pipe_map.xio_bytes -= size; - if (rpipe->pipe_map.xio_bytes == 0) { + rpipe->pipe_buffer.out += size; + if (rpipe->pipe_buffer.out == rpipe->pipe_map.xio_bytes) { rpipe->pipe_state |= PIPE_DIRECTIP; rpipe->pipe_state &= ~PIPE_DIRECTW; wakeup(rpipe); } - } else if (rpipe->pipe_map.xio_bytes && + } else if (rpipe->pipe_buffer.out != rpipe->pipe_map.xio_bytes && rpipe->pipe_kva && rpipe->pipe_feature == PIPE_SFBUF2 && (rpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) @@ -507,23 +508,22 @@ pipe_read(struct file *fp, struct uio *uio, struct ucred *cred, pmap_qenter2(rpipe->pipe_kva, rpipe->pipe_map.xio_pages, rpipe->pipe_map.xio_npages, &rpipe->pipe_kvamask); - size = rpipe->pipe_map.xio_bytes; + size = rpipe->pipe_map.xio_bytes - + rpipe->pipe_buffer.out; if (size > (u_int)uio->uio_resid) size = (u_int)uio->uio_resid; - va = (caddr_t)rpipe->pipe_kva + - rpipe->pipe_map.xio_offset; + va = (caddr_t)rpipe->pipe_kva + xio_kvaoffset(&rpipe->pipe_map, rpipe->pipe_buffer.out); error = uiomove(va, size, uio); if (error) break; nread += size; - rpipe->pipe_map.xio_offset += size; - rpipe->pipe_map.xio_bytes -= size; - if (rpipe->pipe_map.xio_bytes == 0) { + rpipe->pipe_buffer.out += size; + if (rpipe->pipe_buffer.out == rpipe->pipe_map.xio_bytes) { rpipe->pipe_state |= PIPE_DIRECTIP; rpipe->pipe_state &= ~PIPE_DIRECTW; wakeup(rpipe); } - } else if (rpipe->pipe_map.xio_bytes && + } else if (rpipe->pipe_buffer.out != rpipe->pipe_map.xio_bytes && rpipe->pipe_feature == PIPE_SFBUF1 && (rpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) == PIPE_DIRECTW @@ -536,11 +536,12 @@ pipe_read(struct file *fp, struct uio *uio, struct ucred *cred, * direct_write, we set DIRECTIP when we clear * DIRECTW after we have exhausted the buffer. */ - error = xio_uio_copy(&rpipe->pipe_map, uio, &size); + error = xio_uio_copy(&rpipe->pipe_map, rpipe->pipe_buffer.out, uio, &size); if (error) break; nread += size; - if (rpipe->pipe_map.xio_bytes == 0) { + rpipe->pipe_buffer.out += size; + if (rpipe->pipe_buffer.out == rpipe->pipe_map.xio_bytes) { rpipe->pipe_state |= PIPE_DIRECTIP; rpipe->pipe_state &= ~PIPE_DIRECTW; wakeup(rpipe); @@ -639,6 +640,7 @@ pipe_build_write_buffer(wpipe, uio) error = xio_init_ubuf(&wpipe->pipe_map, uio->uio_iov->iov_base, size, XIOF_READ); + wpipe->pipe_buffer.out = 0; if (error) return(error); @@ -681,14 +683,20 @@ pipe_build_write_buffer(wpipe, uio) * In the case of a signal, the writing process might go away. This * code copies the data into the circular buffer so that the source * pages can be freed without loss of data. + * + * Note that in direct mode pipe_buffer.out is used to track the + * XIO offset. We are converting the direct mode into buffered mode + * which changes the meaning of pipe_buffer.out. */ static void pipe_clone_write_buffer(wpipe) struct pipe *wpipe; { int size; + int offset; - size = wpipe->pipe_map.xio_bytes; + offset = wpipe->pipe_buffer.out; + size = wpipe->pipe_map.xio_bytes - offset; KKASSERT(size <= wpipe->pipe_buffer.size); @@ -697,7 +705,7 @@ pipe_clone_write_buffer(wpipe) wpipe->pipe_buffer.cnt = size; wpipe->pipe_state &= ~(PIPE_DIRECTW | PIPE_DIRECTIP); - xio_copy_xtok(&wpipe->pipe_map, wpipe->pipe_buffer.buffer, size); + xio_copy_xtok(&wpipe->pipe_map, offset, wpipe->pipe_buffer.buffer, size); xio_release(&wpipe->pipe_map); if (wpipe->pipe_kva) { pmap_qremove(wpipe->pipe_kva, XIO_INTERNAL_PAGES); @@ -1129,7 +1137,8 @@ pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct thread *td) case FIONREAD: if (mpipe->pipe_state & PIPE_DIRECTW) { - *(int *)data = mpipe->pipe_map.xio_bytes; + *(int *)data = mpipe->pipe_map.xio_bytes - + mpipe->pipe_buffer.out; } else { *(int *)data = mpipe->pipe_buffer.cnt; } @@ -1204,8 +1213,10 @@ pipe_stat(struct file *fp, struct stat *ub, struct thread *td) ub->st_mode = S_IFIFO; ub->st_blksize = pipe->pipe_buffer.size; ub->st_size = pipe->pipe_buffer.cnt; - if (ub->st_size == 0 && (pipe->pipe_state & PIPE_DIRECTW)) - ub->st_size = pipe->pipe_map.xio_bytes; + if (ub->st_size == 0 && (pipe->pipe_state & PIPE_DIRECTW)) { + ub->st_size = pipe->pipe_map.xio_bytes - + pipe->pipe_buffer.out; + } ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; ub->st_atimespec = pipe->pipe_atime; ub->st_mtimespec = pipe->pipe_mtime; @@ -1355,8 +1366,10 @@ filt_piperead(struct knote *kn, long hint) struct pipe *wpipe = rpipe->pipe_peer; kn->kn_data = rpipe->pipe_buffer.cnt; - if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) - kn->kn_data = rpipe->pipe_map.xio_bytes; + if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) { + kn->kn_data = rpipe->pipe_map.xio_bytes - + rpipe->pipe_buffer.out; + } if ((rpipe->pipe_state & PIPE_EOF) || (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { diff --git a/sys/sys/pipe.h b/sys/sys/pipe.h index a15869f535..344f574ec6 100644 --- a/sys/sys/pipe.h +++ b/sys/sys/pipe.h @@ -19,7 +19,7 @@ * are met. * * $FreeBSD: src/sys/sys/pipe.h,v 1.16 1999/12/29 04:24:45 peter Exp $ - * $DragonFly: src/sys/sys/pipe.h,v 1.6 2004/05/01 18:16:44 dillon Exp $ + * $DragonFly: src/sys/sys/pipe.h,v 1.7 2005/03/01 23:35:16 dillon Exp $ */ #ifndef _SYS_PIPE_H_ @@ -87,6 +87,10 @@ enum pipe_feature { PIPE_COPY, PIPE_KMEM, PIPE_SFBUF1, PIPE_SFBUF2 }; /* * Per-pipe data structure. * Two of these are linked together to produce bi-directional pipes. + * + * NOTE: pipe_buffer.out has the dual purpose of tracking the copy offset + * for both the direct write case (with the rest of pipe_buffer) and the + * buffered write case (with pipe_map). */ struct pipe { struct pipebuf pipe_buffer; /* data storage */ diff --git a/sys/sys/xio.h b/sys/sys/xio.h index c5ad68dc54..8f02b1fd02 100644 --- a/sys/sys/xio.h +++ b/sys/sys/xio.h @@ -31,16 +31,21 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/sys/xio.h,v 1.5 2004/07/16 05:51:57 dillon Exp $ + * $DragonFly: src/sys/sys/xio.h,v 1.6 2005/03/01 23:35:16 dillon Exp $ */ /* - * The XIO structure is intended to replace UIO for messaged I/O operations - * within the kernel. The originator of the transaction must supply an XIO - * structure containing a list of appropriate held vm_page's representing - * the buffer. The target of the transaction will generally map the - * pages using the SF_BUF facility, complete the operation, and reply the - * message. + * An XIO holds a platform-agnostic page list representing a data set for + * the purposes of I/O, mapping (SFBUF/MSFBUF), or other operations. The + * representation of the data set is byte aligned. xio_offset and xio_bytes + * specifies the precise byte-ranged block within the page list being + * represented. + * + * XIOs do not track an ongoing I/O, they simply represent a block of data. + * For this reason most XIO API functions have a 'uoffset' argument which + * the caller may use to index within the represented dataset. This index + * is relative to the represented dataset, NOT to the beginning of the + * first page. */ #ifndef _SYS_XIO_H_ #define _SYS_XIO_H_ @@ -70,7 +75,6 @@ typedef struct xio *xio_t; #define XIOF_READ 0x0001 #define XIOF_WRITE 0x0002 -#define XIOF_LINMAP 0x0004 #endif @@ -80,9 +84,34 @@ void xio_init(xio_t xio); int xio_init_ubuf(xio_t xio, void *ubase, size_t ubytes, int vmprot); int xio_init_kbuf(xio_t xio, void *kbase, size_t kbytes); void xio_release(xio_t xio); -int xio_uio_copy(xio_t xio, struct uio *uio, int *sizep); -int xio_copy_xtou(xio_t xio, void *uptr, int bytes); -int xio_copy_xtok(xio_t xio, void *kptr, int bytes); +int xio_uio_copy(xio_t xio, int uoffset, struct uio *uio, int *sizep); +int xio_copy_xtou(xio_t xio, int uoffset, void *uptr, int bytes); +int xio_copy_xtok(xio_t xio, int uoffset, void *kptr, int bytes); + +/* + * XIOs are not modified by copy operations, the caller must track the + * offset itself. This routine will return the number of bytes remaining + * in an XIO's buffer given an offset relative to the buffer used to + * originally construct the XIO. + */ +static __inline +int +xio_remaining(xio_t xio, int uoffset) +{ + return(xio->xio_bytes - uoffset); +} + +/* + * XIOs do not map data but if the page list WERE mapped, this routine will + * return the actual KVA offset given a user offset relative to the original + * buffer used to construct the XIO. + */ +static __inline +int +xio_kvaoffset(xio_t xio, int uoffset) +{ + return(xio->xio_offset + uoffset); +} #endif /* _KERNEL */ -- 2.41.0