kernel - Introduce lightweight buffers
[dragonfly.git] / sys / kern / kern_xio.c
CommitLineData
81ee925d 1/*
8c10bfcf
MD
2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
81ee925d
MD
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
8c10bfcf 10 *
81ee925d
MD
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
8c10bfcf
MD
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
81ee925d 32 * SUCH DAMAGE.
8c10bfcf 33 *
17cde63e 34 * $DragonFly: src/sys/kern/kern_xio.c,v 1.16 2008/05/09 07:24:45 dillon Exp $
81ee925d
MD
35 */
36/*
37 * Kernel XIO interface. An initialized XIO is basically a collection of
38 * appropriately held vm_page_t's. XIO buffers are vmspace agnostic and
39 * can represent userspace or kernelspace buffers, and can be passed to
40 * foreign threads outside of the originating vmspace. XIO buffers are
41 * not mapped into KVM and thus can be manipulated and passed around with
42 * very low overheads.
43 *
44 * The intent is for XIO to be used in the I/O path, VFS, CAPS, and other
45 * places that need to pass (possibly userspace) data between threads.
46 *
47 * TODO: check for busy page when modifying, check writeable.
48 */
49
50#include <sys/param.h>
51#include <sys/systm.h>
52#include <sys/malloc.h>
53#include <sys/proc.h>
54#include <sys/vmmeter.h>
55#include <sys/vnode.h>
56#include <sys/xio.h>
5c5185ae
SG
57
58#include <cpu/lwbuf.h>
81ee925d
MD
59
60#include <vm/vm.h>
61#include <vm/vm_param.h>
62#include <sys/lock.h>
63#include <vm/vm_kern.h>
64#include <vm/pmap.h>
65#include <vm/vm_map.h>
66#include <vm/vm_object.h>
67#include <vm/vm_page.h>
68#include <vm/vm_pageout.h>
69#include <vm/vm_pager.h>
70#include <vm/vm_extern.h>
71#include <vm/vm_page2.h>
72
73/*
d6a46bb7
MD
74 * Just do basic initialization of an empty XIO
75 */
76void
77xio_init(xio_t xio)
78{
79 xio->xio_flags = 0;
80 xio->xio_bytes = 0;
81 xio->xio_error = 0;
82 xio->xio_offset = 0;
83 xio->xio_npages = 0;
84 xio->xio_pages = xio->xio_internal_pages;
85}
86
87/*
81ee925d
MD
88 * Initialize an XIO given a userspace buffer. 0 is returned on success,
89 * an error code on failure. The actual number of bytes that could be
03aa69bd
MD
90 * accomodated in the XIO will be stored in xio_bytes and the page offset
91 * will be stored in xio_offset.
81ee925d
MD
92 */
93int
94xio_init_ubuf(xio_t xio, void *ubase, size_t ubytes, int flags)
95{
96 vm_offset_t addr;
81ee925d 97 vm_page_t m;
c4734fe7 98 vm_page_t m0;
06c5a8d6 99 int error;
81ee925d
MD
100 int i;
101 int n;
102 int vmprot;
103
104 addr = trunc_page((vm_offset_t)ubase);
105 xio->xio_flags = flags;
106 xio->xio_bytes = 0;
107 xio->xio_error = 0;
108 if (ubytes == 0) {
109 xio->xio_offset = 0;
110 xio->xio_npages = 0;
111 } else {
112 vmprot = (flags & XIOF_WRITE) ? VM_PROT_WRITE : VM_PROT_READ;
113 xio->xio_offset = (vm_offset_t)ubase & PAGE_MASK;
114 xio->xio_pages = xio->xio_internal_pages;
115 if ((n = PAGE_SIZE - xio->xio_offset) > ubytes)
116 n = ubytes;
c4734fe7 117 m0 = NULL;
81ee925d 118 for (i = 0; n && i < XIO_INTERNAL_PAGES; ++i) {
06c5a8d6
MD
119 m = vm_fault_page_quick(addr, vmprot, &error);
120 if (m == NULL)
81ee925d 121 break;
81ee925d
MD
122 xio->xio_pages[i] = m;
123 ubytes -= n;
124 xio->xio_bytes += n;
125 if ((n = ubytes) > PAGE_SIZE)
126 n = PAGE_SIZE;
127 addr += PAGE_SIZE;
c4734fe7
MD
128
129 /*
130 * Check linearity, used by syslink to memory map DMA buffers.
131 */
132 if (flags & XIOF_VMLINEAR) {
133 if (i == 0) {
134 m0 = m;
135 } else
136 if (m->object != m0->object || m->pindex != m0->pindex + i) {
137 error = EINVAL;
138 break;
139 }
140 }
81ee925d
MD
141 }
142 xio->xio_npages = i;
143
144 /*
145 * If a failure occured clean out what we loaded and return EFAULT.
17cde63e 146 * Return 0 on success. Do not dirty the pages.
81ee925d
MD
147 */
148 if (i < XIO_INTERNAL_PAGES && n) {
17cde63e 149 xio->xio_flags &= ~XIOF_WRITE;
81ee925d
MD
150 xio_release(xio);
151 xio->xio_error = EFAULT;
152 }
153 }
154 return(xio->xio_error);
155}
156
157/*
158 * Initialize an XIO given a kernelspace buffer. 0 is returned on success,
159 * an error code on failure. The actual number of bytes that could be
03aa69bd
MD
160 * accomodated in the XIO will be stored in xio_bytes and the page offset
161 * will be stored in xio_offset.
81ee925d
MD
162 */
163int
164xio_init_kbuf(xio_t xio, void *kbase, size_t kbytes)
165{
166 vm_offset_t addr;
167 vm_paddr_t paddr;
168 vm_page_t m;
169 int i;
170 int n;
171
172 addr = trunc_page((vm_offset_t)kbase);
173 xio->xio_flags = 0;
174 xio->xio_offset = (vm_offset_t)kbase & PAGE_MASK;
175 xio->xio_bytes = 0;
176 xio->xio_pages = xio->xio_internal_pages;
177 xio->xio_error = 0;
178 if ((n = PAGE_SIZE - xio->xio_offset) > kbytes)
179 n = kbytes;
180 for (i = 0; n && i < XIO_INTERNAL_PAGES; ++i) {
181 if ((paddr = pmap_kextract(addr)) == 0)
182 break;
e43a034f 183 crit_enter();
81ee925d
MD
184 m = PHYS_TO_VM_PAGE(paddr);
185 vm_page_hold(m);
e43a034f 186 crit_exit();
81ee925d
MD
187 xio->xio_pages[i] = m;
188 kbytes -= n;
189 xio->xio_bytes += n;
190 if ((n = kbytes) > PAGE_SIZE)
191 n = PAGE_SIZE;
192 addr += PAGE_SIZE;
193 }
194 xio->xio_npages = i;
195
196 /*
197 * If a failure occured clean out what we loaded and return EFAULT.
198 * Return 0 on success.
199 */
200 if (i < XIO_INTERNAL_PAGES && n) {
201 xio_release(xio);
202 xio->xio_error = EFAULT;
203 }
204 return(xio->xio_error);
205}
206
06ecca5a 207/*
17cde63e
MD
208 * Initialize an XIO given an array of vm_page pointers. The caller is
209 * responsible for any modified state changes for the pages.
83269e7d
MD
210 */
211int
212xio_init_pages(xio_t xio, struct vm_page **mbase, int npages, int xflags)
213{
214 int i;
215
216 KKASSERT(npages <= XIO_INTERNAL_PAGES);
217
218 xio->xio_flags = xflags;
219 xio->xio_offset = 0;
255b6068 220 xio->xio_bytes = npages * PAGE_SIZE;
83269e7d
MD
221 xio->xio_pages = xio->xio_internal_pages;
222 xio->xio_npages = npages;
223 xio->xio_error = 0;
224 crit_enter();
225 for (i = 0; i < npages; ++i) {
226 vm_page_hold(mbase[i]);
227 xio->xio_pages[i] = mbase[i];
228 }
229 crit_exit();
230 return(0);
231}
232
233/*
06ecca5a 234 * Cleanup an XIO so it can be destroyed. The pages associated with the
03aa69bd 235 * XIO are released.
06ecca5a 236 */
81ee925d
MD
237void
238xio_release(xio_t xio)
239{
240 int i;
241 vm_page_t m;
242
e43a034f 243 crit_enter();
81ee925d
MD
244 for (i = 0; i < xio->xio_npages; ++i) {
245 m = xio->xio_pages[i];
17cde63e
MD
246 if (xio->xio_flags & XIOF_WRITE)
247 vm_page_dirty(m);
81ee925d
MD
248 vm_page_unhold(m);
249 }
e43a034f 250 crit_exit();
81ee925d
MD
251 xio->xio_offset = 0;
252 xio->xio_npages = 0;
253 xio->xio_bytes = 0;
254 xio->xio_error = ENOBUFS;
255}
256
257/*
258 * Copy data between an XIO and a UIO. If the UIO represents userspace it
03aa69bd
MD
259 * must be relative to the current context.
260 *
261 * uoffset is the abstracted starting offset in the XIO, not the actual
262 * offset, and usually starts at 0.
263 *
264 * The XIO is not modified. The UIO is updated to reflect the copy.
81ee925d
MD
265 *
266 * UIO_READ xio -> uio
267 * UIO_WRITE uio -> xio
268 */
269int
e54488bb 270xio_uio_copy(xio_t xio, int uoffset, struct uio *uio, size_t *sizep)
81ee925d 271{
e54488bb 272 size_t bytes;
81ee925d 273 int error;
81ee925d 274
03aa69bd
MD
275 bytes = xio->xio_bytes - uoffset;
276 if (bytes > uio->uio_resid)
81ee925d 277 bytes = uio->uio_resid;
03aa69bd
MD
278 KKASSERT(bytes >= 0);
279 error = uiomove_fromphys(xio->xio_pages, xio->xio_offset + uoffset,
280 bytes, uio);
281 if (error == 0)
81ee925d 282 *sizep = bytes;
03aa69bd 283 else
81ee925d 284 *sizep = 0;
81ee925d
MD
285 return(error);
286}
287
288/*
289 * Copy the specified number of bytes from the xio to a userland
03aa69bd 290 * buffer. Return an error code or 0 on success.
81ee925d 291 *
03aa69bd
MD
292 * uoffset is the abstracted starting offset in the XIO, not the actual
293 * offset, and usually starts at 0.
294 *
295 * The XIO is not modified.
81ee925d
MD
296 */
297int
03aa69bd 298xio_copy_xtou(xio_t xio, int uoffset, void *uptr, int bytes)
81ee925d
MD
299{
300 int i;
301 int n;
302 int error;
303 int offset;
304 vm_page_t m;
5c5185ae 305 struct lwbuf *lwb;
81ee925d 306
0a7648b9 307 if (uoffset + bytes > xio->xio_bytes)
81ee925d
MD
308 return(EFAULT);
309
03aa69bd 310 offset = (xio->xio_offset + uoffset) & PAGE_MASK;
82f4c82a 311 if ((n = PAGE_SIZE - offset) > bytes)
81ee925d
MD
312 n = bytes;
313
314 error = 0;
03aa69bd
MD
315 for (i = (xio->xio_offset + uoffset) >> PAGE_SHIFT;
316 i < xio->xio_npages;
317 ++i
318 ) {
81ee925d 319 m = xio->xio_pages[i];
5c5185ae
SG
320 lwb = lwbuf_alloc(m);
321 error = copyout((char *)lwbuf_kva(lwb) + offset, uptr, n);
322 lwbuf_free(lwb);
81ee925d
MD
323 if (error)
324 break;
325 bytes -= n;
81ee925d
MD
326 uptr = (char *)uptr + n;
327 if (bytes == 0)
328 break;
329 if ((n = bytes) > PAGE_SIZE)
330 n = PAGE_SIZE;
331 offset = 0;
332 }
333 return(error);
334}
335
336/*
337 * Copy the specified number of bytes from the xio to a kernel
338 * buffer. Return an error code or 0 on success.
339 *
03aa69bd
MD
340 * uoffset is the abstracted starting offset in the XIO, not the actual
341 * offset, and usually starts at 0.
342 *
343 * The XIO is not modified.
81ee925d
MD
344 */
345int
03aa69bd 346xio_copy_xtok(xio_t xio, int uoffset, void *kptr, int bytes)
81ee925d
MD
347{
348 int i;
349 int n;
350 int error;
351 int offset;
352 vm_page_t m;
5c5185ae 353 struct lwbuf *lwb;
81ee925d 354
03aa69bd 355 if (bytes + uoffset > xio->xio_bytes)
81ee925d
MD
356 return(EFAULT);
357
03aa69bd 358 offset = (xio->xio_offset + uoffset) & PAGE_MASK;
82f4c82a 359 if ((n = PAGE_SIZE - offset) > bytes)
81ee925d
MD
360 n = bytes;
361
362 error = 0;
03aa69bd
MD
363 for (i = (xio->xio_offset + uoffset) >> PAGE_SHIFT;
364 i < xio->xio_npages;
365 ++i
366 ) {
81ee925d 367 m = xio->xio_pages[i];
5c5185ae
SG
368 lwb = lwbuf_alloc(m);
369 bcopy((char *)lwbuf_kva(lwb) + offset, kptr, n);
370 lwbuf_free(lwb);
81ee925d 371 bytes -= n;
81ee925d
MD
372 kptr = (char *)kptr + n;
373 if (bytes == 0)
374 break;
375 if ((n = bytes) > PAGE_SIZE)
376 n = PAGE_SIZE;
377 offset = 0;
378 }
379 return(error);
380}
381
0a7648b9
MD
382/*
383 * Copy the specified number of bytes from userland to the xio.
384 * Return an error code or 0 on success.
385 *
386 * uoffset is the abstracted starting offset in the XIO, not the actual
387 * offset, and usually starts at 0.
388 *
389 * Data in pages backing the XIO will be modified.
390 */
391int
392xio_copy_utox(xio_t xio, int uoffset, const void *uptr, int bytes)
393{
394 int i;
395 int n;
396 int error;
397 int offset;
398 vm_page_t m;
5c5185ae 399 struct lwbuf *lwb;
0a7648b9
MD
400
401 if (uoffset + bytes > xio->xio_bytes)
402 return(EFAULT);
403
404 offset = (xio->xio_offset + uoffset) & PAGE_MASK;
405 if ((n = PAGE_SIZE - offset) > bytes)
406 n = bytes;
407
408 error = 0;
409 for (i = (xio->xio_offset + uoffset) >> PAGE_SHIFT;
410 i < xio->xio_npages;
411 ++i
412 ) {
413 m = xio->xio_pages[i];
5c5185ae
SG
414 lwb = lwbuf_alloc(m);
415 error = copyin(uptr, (char *)lwbuf_kva(lwb) + offset, n);
416 lwbuf_free(lwb);
0a7648b9
MD
417 if (error)
418 break;
419 bytes -= n;
420 uptr = (const char *)uptr + n;
421 if (bytes == 0)
422 break;
423 if ((n = bytes) > PAGE_SIZE)
424 n = PAGE_SIZE;
425 offset = 0;
426 }
427 return(error);
428}
429
430/*
431 * Copy the specified number of bytes from the kernel to the xio.
432 * Return an error code or 0 on success.
433 *
434 * uoffset is the abstracted starting offset in the XIO, not the actual
435 * offset, and usually starts at 0.
436 *
437 * Data in pages backing the XIO will be modified.
438 */
439int
440xio_copy_ktox(xio_t xio, int uoffset, const void *kptr, int bytes)
441{
442 int i;
443 int n;
444 int error;
445 int offset;
446 vm_page_t m;
5c5185ae 447 struct lwbuf *lwb;
0a7648b9
MD
448
449 if (uoffset + bytes > xio->xio_bytes)
450 return(EFAULT);
451
452 offset = (xio->xio_offset + uoffset) & PAGE_MASK;
453 if ((n = PAGE_SIZE - offset) > bytes)
454 n = bytes;
455
456 error = 0;
457 for (i = (xio->xio_offset + uoffset) >> PAGE_SHIFT;
458 i < xio->xio_npages;
459 ++i
460 ) {
461 m = xio->xio_pages[i];
5c5185ae
SG
462 lwb = lwbuf_alloc(m);
463 bcopy(kptr, (char *)lwbuf_kva(lwb) + offset, n);
464 lwbuf_free(lwb);
0a7648b9
MD
465 bytes -= n;
466 kptr = (const char *)kptr + n;
467 if (bytes == 0)
468 break;
469 if ((n = bytes) > PAGE_SIZE)
470 n = PAGE_SIZE;
471 offset = 0;
472 }
473 return(error);
474}