kernel/vfs: Remove some unused variables.
[dragonfly.git] / sys / vfs / puffs / puffs_io.c
CommitLineData
ab5617b3
SW
1/* $NetBSD: puffs_vnops.c,v 1.154 2011/07/04 08:07:30 manu Exp $ */
2
3/*
4 * Copyright (c) 2005, 2006, 2007 Antti Kantee. All Rights Reserved.
5 *
6 * Development of this software was supported by the
7 * Google Summer of Code program and the Ulla Tuominen Foundation.
8 * The Google SoC project was mentored by Bill Studenmund.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
20 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <sys/param.h>
33#include <sys/buf.h>
34#include <sys/lockf.h>
35#include <sys/malloc.h>
36#include <sys/mount.h>
37#include <sys/namei.h>
38#include <sys/vnode.h>
39#include <sys/proc.h>
40#include <sys/thread2.h>
41
42#include <vfs/puffs/puffs_msgif.h>
43#include <vfs/puffs/puffs_sys.h>
44
45#define RWARGS(cont, iofl, move, offset, creds) \
46 (cont)->pvnr_ioflag = (iofl); \
47 (cont)->pvnr_resid = (move); \
48 (cont)->pvnr_offset = (offset); \
49 puffs_credcvt(&(cont)->pvnr_cred, creds)
50
51int
52puffs_directread(struct vnode *vp, struct uio *uio, int ioflag,
53 struct ucred *cred)
54{
55 PUFFS_MSG_VARS(vn, read);
56 struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
57 size_t tomove, argsize;
58 int error;
59
60 KKASSERT(vp->v_type == VREG);
61
62 if (uio->uio_offset < 0)
63 return EINVAL;
64 if (uio->uio_resid == 0)
65 return 0;
66
67 read_msg = NULL;
68 error = 0;
69
70 /* std sanity */
71 if (uio->uio_resid == 0)
72 return 0;
73 if (uio->uio_offset < 0)
74 return EINVAL;
75
76 /*
77 * in case it's not a regular file or we're operating
78 * uncached, do read in the old-fashioned style,
79 * i.e. explicit read operations
80 */
81
82 tomove = PUFFS_TOMOVE(uio->uio_resid, pmp);
83 argsize = sizeof(struct puffs_vnmsg_read);
84 puffs_msgmem_alloc(argsize + tomove, &park_read,
85 (void *)&read_msg, 1);
86
87 error = 0;
88 while (uio->uio_resid > 0) {
89 tomove = PUFFS_TOMOVE(uio->uio_resid, pmp);
90 memset(read_msg, 0, argsize); /* XXX: touser KASSERT */
91 RWARGS(read_msg, ioflag, tomove,
92 uio->uio_offset, cred);
93 puffs_msg_setinfo(park_read, PUFFSOP_VN,
94 PUFFS_VN_READ, VPTOPNC(vp));
95 puffs_msg_setdelta(park_read, tomove);
96
97 PUFFS_MSG_ENQUEUEWAIT2(pmp, park_read, vp->v_data,
98 NULL, error);
99 error = checkerr(pmp, error, __func__);
100 if (error)
101 break;
102
103 if (read_msg->pvnr_resid > tomove) {
104 puffs_senderr(pmp, PUFFS_ERR_READ,
105 E2BIG, "resid grew", VPTOPNC(vp));
106 error = EPROTO;
107 break;
108 }
109
110 error = uiomove(read_msg->pvnr_data,
111 tomove - read_msg->pvnr_resid, uio);
112
113 /*
114 * in case the file is out of juice, resid from
115 * userspace is != 0. and the error-case is
116 * quite obvious
117 */
118 if (error || read_msg->pvnr_resid)
119 break;
120 }
121
122 puffs_msgmem_release(park_read);
123
124 return error;
125}
126
127int
128puffs_directwrite(struct vnode *vp, struct uio *uio, int ioflag,
129 struct ucred *cred)
130{
131 PUFFS_MSG_VARS(vn, write);
132 struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
133 size_t tomove, argsize;
134 int error, uflags;
135
136 KKASSERT(vp->v_type == VREG);
137
138 if (uio->uio_offset < 0)
139 return EINVAL;
140 if (uio->uio_resid == 0)
141 return 0;
142
143 error = uflags = 0;
144 write_msg = NULL;
145
146 /* tomove is non-increasing */
147 tomove = PUFFS_TOMOVE(uio->uio_resid, pmp);
148 argsize = sizeof(struct puffs_vnmsg_write) + tomove;
149 puffs_msgmem_alloc(argsize, &park_write, (void *)&write_msg,1);
150
151 while (uio->uio_resid > 0) {
152 /* move data to buffer */
153 tomove = PUFFS_TOMOVE(uio->uio_resid, pmp);
154 memset(write_msg, 0, argsize); /* XXX: touser KASSERT */
155 RWARGS(write_msg, ioflag, tomove,
156 uio->uio_offset, cred);
157 error = uiomove(write_msg->pvnr_data, tomove, uio);
158 if (error)
159 break;
160
161 /* move buffer to userspace */
162 puffs_msg_setinfo(park_write, PUFFSOP_VN,
163 PUFFS_VN_WRITE, VPTOPNC(vp));
164 PUFFS_MSG_ENQUEUEWAIT2(pmp, park_write, vp->v_data,
165 NULL, error);
166 error = checkerr(pmp, error, __func__);
167 if (error)
168 break;
169
170 if (write_msg->pvnr_resid > tomove) {
171 puffs_senderr(pmp, PUFFS_ERR_WRITE,
172 E2BIG, "resid grew", VPTOPNC(vp));
173 error = EPROTO;
174 break;
175 }
176
177 if (PUFFS_USE_PAGECACHE(pmp))
178 KKASSERT(vp->v_filesize >= uio->uio_offset);
179
180 /* didn't move everything? bad userspace. bail */
181 if (write_msg->pvnr_resid != 0) {
182 error = EIO;
183 break;
184 }
185 }
186 puffs_msgmem_release(park_write);
187
188 return error;
189}
190
191static void
192puffs_iodone(struct bio *bio)
193{
194 bio->bio_flags = 0;
195 bpdone(bio->bio_buf, 0);
196}
197
198int
199puffs_bioread(struct vnode *vp, struct uio *uio, int ioflag,
200 struct ucred *cred)
201{
202 int biosize = vp->v_mount->mnt_stat.f_iosize;
203 struct buf *bp;
204 struct vattr vattr;
205 off_t lbn, loffset, fsize;
206 size_t n;
1d0de3d3 207 int boff;
ab5617b3
SW
208 int error = 0;
209
210 KKASSERT(uio->uio_rw == UIO_READ);
211 KKASSERT(vp->v_type == VREG);
212
213 if (uio->uio_offset < 0)
214 return EINVAL;
215 if (uio->uio_resid == 0)
216 return 0;
217
ab5617b3
SW
218 /*
219 * Cache consistency can only be maintained approximately.
220 *
221 * GETATTR is called to synchronize the file size.
222 *
223 * NOTE: In the normal case the attribute cache is not
224 * cleared which means GETATTR may use cached data and
225 * not immediately detect changes made on the server.
226 */
227
228 error = VOP_GETATTR(vp, &vattr);
229 if (error)
230 return error;
231
232 /*
233 * Loop until uio exhausted or we hit EOF
234 */
235 do {
236 bp = NULL;
237
238 lbn = uio->uio_offset / biosize;
239 boff = uio->uio_offset & (biosize - 1);
39215f4c 240 loffset = lbn * biosize;
ab5617b3
SW
241 fsize = puffs_meta_getsize(vp);
242
243 if (loffset + boff >= fsize) {
244 n = 0;
245 break;
246 }
247 bp = getblk(vp, loffset, biosize, 0, 0);
248
249 if (bp == NULL)
250 return EINTR;
251
252 /*
253 * If B_CACHE is not set, we must issue the read. If this
254 * fails, we return an error.
255 */
256 if ((bp->b_flags & B_CACHE) == 0) {
257 bp->b_cmd = BUF_CMD_READ;
258 bp->b_bio2.bio_done = puffs_iodone;
259 bp->b_bio2.bio_flags |= BIO_SYNC;
260 vfs_busy_pages(vp, bp);
261 error = puffs_doio(vp, &bp->b_bio2, uio->uio_td);
262 if (error) {
263 brelse(bp);
264 return error;
265 }
266 }
267
268 /*
269 * on is the offset into the current bp. Figure out how many
270 * bytes we can copy out of the bp. Note that bcount is
271 * NOT DEV_BSIZE aligned.
272 *
273 * Then figure out how many bytes we can copy into the uio.
274 */
275 n = biosize - boff;
276 if (n > uio->uio_resid)
277 n = uio->uio_resid;
278 if (loffset + boff + n > fsize)
279 n = fsize - loffset - boff;
280
281 if (n > 0)
282 error = uiomove(bp->b_data + boff, n, uio);
283 if (bp)
284 brelse(bp);
285 } while (error == 0 && uio->uio_resid > 0 && n > 0);
286
287 return error;
288}
289
290int
291puffs_biowrite(struct vnode *vp, struct uio *uio, int ioflag,
292 struct ucred *cred)
293{
294 int biosize = vp->v_mount->mnt_stat.f_iosize;
295 struct buf *bp;
296 struct vattr vattr;
297 off_t loffset, fsize;
298 int boff, bytes;
299 int error = 0;
300 int bcount;
301 int trivial;
302
303 KKASSERT(uio->uio_rw == UIO_WRITE);
304 KKASSERT(vp->v_type == VREG);
305
306 if (uio->uio_offset < 0)
307 return EINVAL;
308 if (uio->uio_resid == 0)
309 return 0;
310
311 /*
312 * If IO_APPEND then load uio_offset. We restart here if we cannot
313 * get the append lock.
314 *
315 * We need to obtain exclusize lock if we intend to modify file size
316 * in order to guarentee the append point with multiple contending
317 * writers.
318 */
319 if (ioflag & IO_APPEND) {
320 /* XXXDF relock if necessary */
321 KKASSERT(vn_islocked(vp) == LK_EXCLUSIVE);
322 error = VOP_GETATTR(vp, &vattr);
323 if (error)
324 return error;
325 uio->uio_offset = puffs_meta_getsize(vp);
326 }
327
328 do {
329 boff = uio->uio_offset & (biosize-1);
330 loffset = uio->uio_offset - boff;
331 bytes = (int)szmin((unsigned)(biosize - boff), uio->uio_resid);
332again:
333 /*
334 * Handle direct append and file extension cases, calculate
335 * unaligned buffer size. When extending B_CACHE will be
336 * set if possible. See UIO_NOCOPY note below.
337 */
338 fsize = puffs_meta_getsize(vp);
339 if (uio->uio_offset + bytes > fsize) {
340 trivial = (uio->uio_segflg != UIO_NOCOPY &&
341 uio->uio_offset <= fsize);
342 puffs_meta_setsize(vp, uio->uio_offset + bytes,
343 trivial);
344 }
345 bp = getblk(vp, loffset, biosize, 0, 0);
346 if (bp == NULL) {
347 error = EINTR;
348 break;
349 }
350
351 /*
352 * Actual bytes in buffer which we care about
353 */
354 if (loffset + biosize < fsize)
355 bcount = biosize;
356 else
357 bcount = (int)(fsize - loffset);
358
359 /*
360 * Avoid a read by setting B_CACHE where the data we
361 * intend to write covers the entire buffer. Note
362 * that the buffer may have been set to B_CACHE by
363 * puffs_meta_setsize() above or otherwise inherited the
364 * flag, but if B_CACHE isn't set the buffer may be
365 * uninitialized and must be zero'd to accomodate
366 * future seek+write's.
367 *
368 * See the comments in kern/vfs_bio.c's getblk() for
369 * more information.
370 *
371 * When doing a UIO_NOCOPY write the buffer is not
372 * overwritten and we cannot just set B_CACHE unconditionally
373 * for full-block writes.
374 */
375 if (boff == 0 && bytes == biosize &&
376 uio->uio_segflg != UIO_NOCOPY) {
377 bp->b_flags |= B_CACHE;
378 bp->b_flags &= ~(B_ERROR | B_INVAL);
379 }
380
381 /*
382 * b_resid may be set due to file EOF if we extended out.
383 * The NFS bio code will zero the difference anyway so
384 * just acknowledged the fact and set b_resid to 0.
385 */
386 if ((bp->b_flags & B_CACHE) == 0) {
387 bp->b_cmd = BUF_CMD_READ;
388 bp->b_bio2.bio_done = puffs_iodone;
389 bp->b_bio2.bio_flags |= BIO_SYNC;
390 vfs_busy_pages(vp, bp);
391 error = puffs_doio(vp, &bp->b_bio2, uio->uio_td);
392 if (error) {
393 brelse(bp);
394 break;
395 }
396 bp->b_resid = 0;
397 }
398
399 /*
400 * If dirtyend exceeds file size, chop it down. This should
401 * not normally occur but there is an append race where it
402 * might occur XXX, so we log it.
403 *
404 * If the chopping creates a reverse-indexed or degenerate
405 * situation with dirtyoff/end, we 0 both of them.
406 */
407 if (bp->b_dirtyend > bcount) {
408 kprintf("PUFFS append race @%08llx:%d\n",
409 (long long)bp->b_bio2.bio_offset,
410 bp->b_dirtyend - bcount);
411 bp->b_dirtyend = bcount;
412 }
413
414 if (bp->b_dirtyoff >= bp->b_dirtyend)
415 bp->b_dirtyoff = bp->b_dirtyend = 0;
416
417 /*
418 * If the new write will leave a contiguous dirty
419 * area, just update the b_dirtyoff and b_dirtyend,
420 * otherwise force a write rpc of the old dirty area.
421 *
422 * While it is possible to merge discontiguous writes due to
423 * our having a B_CACHE buffer ( and thus valid read data
424 * for the hole), we don't because it could lead to
425 * significant cache coherency problems with multiple clients,
426 * especially if locking is implemented later on.
427 *
428 * as an optimization we could theoretically maintain
429 * a linked list of discontinuous areas, but we would still
430 * have to commit them separately so there isn't much
431 * advantage to it except perhaps a bit of asynchronization.
432 */
433 if (bp->b_dirtyend > 0 &&
434 (boff > bp->b_dirtyend ||
435 (boff + bytes) < bp->b_dirtyoff)
436 ) {
437 if (bwrite(bp) == EINTR) {
438 error = EINTR;
439 break;
440 }
441 goto again;
442 }
443
444 error = uiomove(bp->b_data + boff, bytes, uio);
445
446 /*
447 * Since this block is being modified, it must be written
448 * again and not just committed. Since write clustering does
449 * not work for the stage 1 data write, only the stage 2
450 * commit rpc, we have to clear B_CLUSTEROK as well.
451 */
452 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
453
454 if (error) {
455 brelse(bp);
456 break;
457 }
458
459 /*
460 * Only update dirtyoff/dirtyend if not a degenerate
461 * condition.
462 *
463 * The underlying VM pages have been marked valid by
464 * virtue of acquiring the bp. Because the entire buffer
465 * is marked dirty we do not have to worry about cleaning
466 * out the related dirty bits (and wouldn't really know
467 * how to deal with byte ranges anyway)
468 */
469 if (bytes) {
470 if (bp->b_dirtyend > 0) {
471 bp->b_dirtyoff = imin(boff, bp->b_dirtyoff);
472 bp->b_dirtyend = imax(boff + bytes,
473 bp->b_dirtyend);
474 } else {
475 bp->b_dirtyoff = boff;
476 bp->b_dirtyend = boff + bytes;
477 }
478 }
479
480 if (ioflag & IO_SYNC) {
481 if (ioflag & IO_INVAL)
482 bp->b_flags |= B_NOCACHE;
483 error = bwrite(bp);
484 if (error)
485 break;
486 } else {
487 bdwrite(bp);
488 }
489 } while (uio->uio_resid > 0 && bytes > 0);
490
491 return error;
492}
493
494int
495puffs_doio(struct vnode *vp, struct bio *bio, struct thread *td)
496{
497 struct buf *bp = bio->bio_buf;
498 struct ucred *cred;
499 struct uio *uiop;
500 struct uio uio;
501 struct iovec io;
502 size_t n;
503 int error = 0;
504
505 if (td != NULL && td->td_proc != NULL)
506 cred = td->td_proc->p_ucred;
507 else
508 cred = proc0.p_ucred;
509
510 uiop = &uio;
511 uiop->uio_iov = &io;
512 uiop->uio_iovcnt = 1;
513 uiop->uio_segflg = UIO_SYSSPACE;
514 uiop->uio_td = td;
515
516 /*
517 * clear B_ERROR and B_INVAL state prior to initiating the I/O. We
518 * do this here so we do not have to do it in all the code that
519 * calls us.
520 */
521 bp->b_flags &= ~(B_ERROR | B_INVAL);
522
523 KASSERT(bp->b_cmd != BUF_CMD_DONE,
524 ("puffs_doio: bp %p already marked done!", bp));
525
526 if (bp->b_cmd == BUF_CMD_READ) {
527 io.iov_len = uiop->uio_resid = (size_t)bp->b_bcount;
528 io.iov_base = bp->b_data;
529 uiop->uio_rw = UIO_READ;
530
531 uiop->uio_offset = bio->bio_offset;
532 error = puffs_directread(vp, uiop, 0, cred);
533 if (error == 0 && uiop->uio_resid) {
534 n = (size_t)bp->b_bcount - uiop->uio_resid;
535 bzero(bp->b_data + n, bp->b_bcount - n);
536 uiop->uio_resid = 0;
537 }
538 if (error) {
539 bp->b_flags |= B_ERROR;
540 bp->b_error = error;
541 }
542 bp->b_resid = uiop->uio_resid;
543 } else {
544 KKASSERT(bp->b_cmd == BUF_CMD_WRITE);
545 if (bio->bio_offset + bp->b_dirtyend > puffs_meta_getsize(vp))
546 bp->b_dirtyend = puffs_meta_getsize(vp) -
547 bio->bio_offset;
548
549 if (bp->b_dirtyend > bp->b_dirtyoff) {
550 io.iov_len = uiop->uio_resid = bp->b_dirtyend
551 - bp->b_dirtyoff;
552 uiop->uio_offset = bio->bio_offset + bp->b_dirtyoff;
553 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
554 uiop->uio_rw = UIO_WRITE;
555
556 error = puffs_directwrite(vp, uiop, 0, cred);
557
558 if (error == EINTR
559 || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
560 crit_enter();
561 bp->b_flags &= ~(B_INVAL|B_NOCACHE);
562 if ((bp->b_flags & B_PAGING) == 0)
563 bdirty(bp);
564 if (error)
565 bp->b_flags |= B_EINTR;
566 crit_exit();
567 } else {
568 if (error) {
569 bp->b_flags |= B_ERROR;
570 bp->b_error = error;
571 }
572 bp->b_dirtyoff = bp->b_dirtyend = 0;
573 }
574 bp->b_resid = uiop->uio_resid;
575 } else {
576 bp->b_resid = 0;
577 }
578 }
579
580 biodone(bio);
581 KKASSERT(bp->b_cmd == BUF_CMD_DONE);
582 if (bp->b_flags & B_EINTR)
583 return (EINTR);
584 if (bp->b_flags & B_ERROR)
585 return (bp->b_error ? bp->b_error : EIO);
586 return (0);
587}