Simplify vn_lock(), VOP_LOCK(), and VOP_UNLOCK() by removing the thread_t
[dragonfly.git] / sys / vfs / ufs / ffs_rawread.c
CommitLineData
984263bc
MD
1/*-
2 * Copyright (c) 2000-2003 Tor Egge
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: src/sys/ufs/ffs/ffs_rawread.c,v 1.3.2.2 2003/05/29 06:15:35 alc Exp $
ca466bae 27 * $DragonFly: src/sys/vfs/ufs/ffs_rawread.c,v 1.23 2006/05/05 21:15:10 dillon Exp $
984263bc
MD
28 */
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/fcntl.h>
33#include <sys/file.h>
34#include <sys/stat.h>
35#include <sys/proc.h>
36#include <sys/mount.h>
37#include <sys/namei.h>
38#include <sys/vnode.h>
39#include <sys/conf.h>
40#include <sys/filio.h>
41#include <sys/ttycom.h>
42#include <sys/buf.h>
1f2de5d4
MD
43#include "quota.h"
44#include "inode.h"
45#include "fs.h"
984263bc
MD
46
47#include <machine/limits.h>
48#include <vm/vm.h>
49#include <vm/vm_extern.h>
50#include <vm/vm_object.h>
51#include <sys/kernel.h>
52#include <sys/sysctl.h>
53
2d53bf02
MD
54static int ffs_rawread_readahead(struct vnode *vp, caddr_t udata, off_t offset,
55 size_t len, struct thread *td, struct buf *bp,
3591bbc6 56 int *baseticks);
984263bc
MD
57static int ffs_rawread_main(struct vnode *vp,
58 struct uio *uio);
59
2d53bf02 60static int ffs_rawread_sync(struct vnode *vp, struct thread *td);
984263bc
MD
61
62int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
63
64void ffs_rawread_setup(void);
65
81b5c339 66static void ffs_rawreadwakeup(struct bio *bio);
984263bc
MD
67
68
69SYSCTL_DECL(_vfs_ffs);
70
71static int ffsrawbufcnt = 4;
72SYSCTL_INT(_vfs_ffs, OID_AUTO, ffsrawbufcnt, CTLFLAG_RD, &ffsrawbufcnt, 0,
73 "Buffers available for raw reads");
74
75static int allowrawread = 1;
76SYSCTL_INT(_vfs_ffs, OID_AUTO, allowrawread, CTLFLAG_RW, &allowrawread, 0,
77 "Flag to enable raw reads");
78
79static int rawreadahead = 1;
80SYSCTL_INT(_vfs_ffs, OID_AUTO, rawreadahead, CTLFLAG_RW, &rawreadahead, 0,
81 "Flag to enable readahead for long raw reads");
82
83
84void
85ffs_rawread_setup(void)
86{
87 ffsrawbufcnt = (nswbuf > 100 ) ? (nswbuf - (nswbuf >> 4)) : nswbuf - 8;
88}
89
90
91static int
2d53bf02 92ffs_rawread_sync(struct vnode *vp, struct thread *td)
984263bc 93{
984263bc
MD
94 int error;
95 int upgraded;
96
97 /* Check for dirty mmap, pending writes and dirty buffers */
165dba55 98 crit_enter();
81b5c339 99 if (vp->v_track_write.bk_active > 0 ||
6bae6177 100 !RB_EMPTY(&vp->v_rbdirty_tree) ||
984263bc 101 (vp->v_flag & VOBJDIRTY) != 0) {
165dba55 102 crit_exit();
984263bc 103
2d53bf02 104 if (VOP_ISLOCKED(vp, td) != LK_EXCLUSIVE) {
984263bc
MD
105 upgraded = 1;
106 /* Upgrade to exclusive lock, this might block */
ca466bae 107 VOP_LOCK(vp, LK_UPGRADE | LK_NOPAUSE);
984263bc
MD
108 } else
109 upgraded = 0;
110
111 /* Attempt to msync mmap() regions to clean dirty mmap */
112 if ((vp->v_flag & VOBJDIRTY) != 0) {
113 struct vm_object *obj;
7540ab49 114 if ((obj = vp->v_object) != NULL)
984263bc
MD
115 vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
116 }
117
118 /* Wait for pending writes to complete */
165dba55 119 crit_enter();
81b5c339
MD
120 while (vp->v_track_write.bk_active) {
121 vp->v_track_write.bk_waitflag = 1;
122 error = tsleep(&vp->v_track_write, 0, "rawrdfls", 0);
984263bc 123 if (error != 0) {
165dba55 124 crit_exit();
984263bc 125 if (upgraded != 0)
ca466bae 126 VOP_LOCK(vp, LK_DOWNGRADE);
984263bc
MD
127 return (error);
128 }
129 }
130 /* Flush dirty buffers */
6bae6177 131 if (!RB_EMPTY(&vp->v_rbdirty_tree)) {
165dba55 132 crit_exit();
2d53bf02 133 if ((error = VOP_FSYNC(vp, MNT_WAIT, td)) != 0) {
984263bc 134 if (upgraded != 0)
ca466bae 135 VOP_LOCK(vp, LK_DOWNGRADE);
984263bc
MD
136 return (error);
137 }
165dba55 138 crit_enter();
81b5c339 139 if (vp->v_track_write.bk_active > 0 ||
6bae6177 140 !RB_EMPTY(&vp->v_rbdirty_tree))
984263bc
MD
141 panic("ffs_rawread_sync: dirty bufs");
142 }
165dba55 143 crit_exit();
984263bc 144 if (upgraded != 0)
ca466bae 145 VOP_LOCK(vp, LK_DOWNGRADE);
984263bc 146 } else {
165dba55 147 crit_exit();
984263bc
MD
148 }
149 return 0;
150}
151
152
153static int
81b5c339 154ffs_rawread_readahead(struct vnode *vp, caddr_t udata, off_t loffset,
2d53bf02 155 size_t len, struct thread *td, struct buf *bp,
3591bbc6 156 int *baseticks)
984263bc
MD
157{
158 int error;
3591bbc6 159 int iolen;
984263bc
MD
160 int blockoff;
161 int bsize;
162 struct vnode *dp;
163 int bforwards;
164
165 bsize = vp->v_mount->mnt_stat.f_iosize;
3591bbc6
MD
166
167 /*
168 * Make sure it fits into the pbuf
169 */
170 iolen = (int)(intptr_t)udata & PAGE_MASK;
171 if (len + iolen > bp->b_kvasize) {
172 len = bp->b_kvasize;
984263bc 173 if (iolen != 0)
3591bbc6 174 len -= PAGE_SIZE;
984263bc 175 }
10f3fee5 176 bp->b_flags &= ~B_ERROR;
81b5c339 177 bp->b_loffset = loffset;
81b5c339 178 bp->b_bio2.bio_offset = NOOFFSET;
81b5c339
MD
179 bp->b_bio2.bio_done = ffs_rawreadwakeup;
180
aa73238b
MD
181 blockoff = (loffset % bsize) / DEV_BSIZE;
182
54078292 183 error = VOP_BMAP(vp, bp->b_loffset, &dp, &bp->b_bio2.bio_offset,
81b5c339 184 &bforwards, NULL);
984263bc
MD
185 if (error != 0) {
186 return error;
187 }
54078292 188 if (bp->b_bio2.bio_offset == NOOFFSET) {
81b5c339
MD
189 /*
190 * Fill holes with NULs to preserve semantics
191 */
3591bbc6
MD
192 if (len + blockoff * DEV_BSIZE > bsize)
193 len = bsize - blockoff * DEV_BSIZE;
984263bc 194
3591bbc6 195 if (vmapbuf(bp, udata, len) < 0)
984263bc
MD
196 return EFAULT;
197
2d53bf02
MD
198 if (ticks - *baseticks >= hogticks) {
199 *baseticks = ticks;
984263bc 200 uio_yield();
2d53bf02 201 }
3591bbc6 202 bzero(bp->b_data, bp->b_bcount);
984263bc
MD
203
204 /* Mark operation completed (similar to bufdone()) */
205
206 bp->b_resid = 0;
984263bc
MD
207 return 0;
208 }
209
3591bbc6
MD
210 if (len + blockoff * DEV_BSIZE > bforwards)
211 len = bforwards - blockoff * DEV_BSIZE;
54078292 212 bp->b_bio2.bio_offset += blockoff * DEV_BSIZE;
984263bc 213
3591bbc6 214 if (vmapbuf(bp, udata, len) < 0)
984263bc
MD
215 return EFAULT;
216
81b5c339
MD
217 /*
218 * Access the block device layer using the device vnode (dp) and
219 * the translated block number (bio2) instead of the logical block
220 * number (bio1).
221 *
222 * Even though we are bypassing the vnode layer, we still
223 * want the vnode state to indicate that an I/O on its behalf
224 * is in progress.
225 */
10f3fee5 226 bp->b_cmd = BUF_CMD_READ;
81b5c339
MD
227 bio_start_transaction(&bp->b_bio1, &vp->v_track_read);
228 vn_strategy(dp, &bp->b_bio2);
984263bc
MD
229 return 0;
230}
231
984263bc 232static int
2d53bf02 233ffs_rawread_main(struct vnode *vp, struct uio *uio)
984263bc
MD
234{
235 int error, nerror;
236 struct buf *bp, *nbp, *tbp;
3591bbc6 237 int iolen;
2d53bf02 238 int baseticks = ticks;
984263bc 239 caddr_t udata;
54078292 240 int resid;
984263bc 241 off_t offset;
2d53bf02 242 struct thread *td;
984263bc 243
2d53bf02 244 td = uio->uio_td ? uio->uio_td : curthread;
984263bc
MD
245 udata = uio->uio_iov->iov_base;
246 resid = uio->uio_resid;
247 offset = uio->uio_offset;
248
984263bc
MD
249 error = 0;
250 nerror = 0;
251
252 bp = NULL;
253 nbp = NULL;
984263bc
MD
254
255 while (resid > 0) {
256
257 if (bp == NULL) { /* Setup first read */
258 /* XXX: Leave some bufs for swap */
259 bp = getpbuf(&ffsrawbufcnt);
2d53bf02 260 error = ffs_rawread_readahead(vp, udata, offset, resid,
3591bbc6 261 td, bp, &baseticks);
984263bc
MD
262 if (error != 0)
263 break;
264
265 if (resid > bp->b_bufsize) { /* Setup fist readahead */
266 /* XXX: Leave bufs for swap */
267 if (rawreadahead != 0)
268 nbp = trypbuf(&ffsrawbufcnt);
269 else
270 nbp = NULL;
271 if (nbp != NULL) {
2d53bf02
MD
272 nerror = ffs_rawread_readahead(
273 vp,
274 udata + bp->b_bufsize,
275 offset + bp->b_bufsize,
276 resid - bp->b_bufsize,
3591bbc6 277 td, nbp, &baseticks);
984263bc
MD
278 if (nerror) {
279 relpbuf(nbp, &ffsrawbufcnt);
280 nbp = NULL;
281 }
282 }
283 }
284 }
285
165dba55 286 crit_enter();
10f3fee5 287 while (bp->b_cmd != BUF_CMD_DONE)
81b5c339 288 tsleep((caddr_t)&bp->b_bio2, 0, "rawrd", 0);
165dba55 289 crit_exit();
984263bc
MD
290
291 vunmapbuf(bp);
292
293 iolen = bp->b_bcount - bp->b_resid;
294 if (iolen == 0 && (bp->b_flags & B_ERROR) == 0) {
295 nerror = 0; /* Ignore possible beyond EOF error */
296 break; /* EOF */
297 }
298
299 if ((bp->b_flags & B_ERROR) != 0) {
300 error = bp->b_error;
301 break;
302 }
81b5c339 303 clearbiocache(&bp->b_bio2);
984263bc
MD
304 resid -= iolen;
305 udata += iolen;
306 offset += iolen;
307 if (iolen < bp->b_bufsize) {
308 /* Incomplete read. Try to read remaining part */
2d53bf02
MD
309 error = ffs_rawread_readahead(
310 vp, udata, offset,
3591bbc6 311 bp->b_bufsize - iolen, td, bp, &baseticks);
984263bc
MD
312 if (error != 0)
313 break;
314 } else if (nbp != NULL) { /* Complete read with readahead */
315
316 tbp = bp;
317 bp = nbp;
318 nbp = tbp;
319
81b5c339 320 clearbiocache(&nbp->b_bio2);
984263bc
MD
321
322 if (resid <= bp->b_bufsize) { /* No more readaheads */
323 relpbuf(nbp, &ffsrawbufcnt);
324 nbp = NULL;
325 } else { /* Setup next readahead */
2d53bf02
MD
326 nerror = ffs_rawread_readahead(
327 vp, udata + bp->b_bufsize,
328 offset + bp->b_bufsize,
329 resid - bp->b_bufsize,
3591bbc6 330 td, nbp, &baseticks);
984263bc
MD
331 if (nerror != 0) {
332 relpbuf(nbp, &ffsrawbufcnt);
333 nbp = NULL;
334 }
335 }
336 } else if (nerror != 0) {/* Deferred Readahead error */
337 break;
338 } else if (resid > 0) { /* More to read, no readahead */
339 error = ffs_rawread_readahead(vp, udata, offset,
3591bbc6 340 resid, td, bp,
2d53bf02 341 &baseticks);
984263bc
MD
342 if (error != 0)
343 break;
344 }
345 }
346
347 if (bp != NULL)
348 relpbuf(bp, &ffsrawbufcnt);
349 if (nbp != NULL) { /* Run down readahead buffer */
165dba55 350 crit_enter();
10f3fee5 351 while (nbp->b_cmd != BUF_CMD_DONE)
81b5c339 352 tsleep(&nbp->b_bio2, 0, "rawrd", 0);
165dba55 353 crit_exit();
984263bc
MD
354 vunmapbuf(nbp);
355 relpbuf(nbp, &ffsrawbufcnt);
356 }
357
358 if (error == 0)
359 error = nerror;
984263bc
MD
360 uio->uio_iov->iov_base = udata;
361 uio->uio_resid = resid;
362 uio->uio_offset = offset;
363 return error;
364}
365
366
367int
368ffs_rawread(struct vnode *vp,
369 struct uio *uio,
370 int *workdone)
371{
372 if (allowrawread != 0 &&
373 uio->uio_iovcnt == 1 &&
374 uio->uio_segflg == UIO_USERSPACE &&
375 uio->uio_resid == uio->uio_iov->iov_len &&
dadab5e9
MD
376 (((uio->uio_td != NULL) ? uio->uio_td : curthread)->td_flags &
377 TDF_DEADLKTREAT) == 0) {
984263bc
MD
378 int secsize; /* Media sector size */
379 off_t filebytes; /* Bytes left of file */
380 int blockbytes; /* Bytes left of file in full blocks */
381 int partialbytes; /* Bytes in last partial block */
382 int skipbytes; /* Bytes not to read in ffs_rawread */
383 struct inode *ip;
384 int error;
385
386
387 /* Only handle sector aligned reads */
388 ip = VTOI(vp);
389 secsize = ip->i_devvp->v_rdev->si_bsize_phys;
390 if ((uio->uio_offset & (secsize - 1)) == 0 &&
391 (uio->uio_resid & (secsize - 1)) == 0) {
392
393 /* Sync dirty pages and buffers if needed */
394 error = ffs_rawread_sync(vp,
2d53bf02
MD
395 (uio->uio_td != NULL) ?
396 uio->uio_td : curthread);
984263bc
MD
397 if (error != 0)
398 return error;
399
400 /* Check for end of file */
401 if (ip->i_size > uio->uio_offset) {
402 filebytes = ip->i_size - uio->uio_offset;
403
404 /* No special eof handling needed ? */
405 if (uio->uio_resid <= filebytes) {
406 *workdone = 1;
407 return ffs_rawread_main(vp, uio);
408 }
409
410 partialbytes = ((unsigned int) ip->i_size) %
411 ip->i_fs->fs_bsize;
412 blockbytes = (int) filebytes - partialbytes;
413 if (blockbytes > 0) {
414 skipbytes = uio->uio_resid -
415 blockbytes;
416 uio->uio_resid = blockbytes;
417 error = ffs_rawread_main(vp, uio);
418 uio->uio_resid += skipbytes;
419 if (error != 0)
420 return error;
421 /* Read remaining part using buffer */
422 }
423 }
424 }
425 }
426 *workdone = 0;
427 return 0;
428}
429
430
431static void
81b5c339 432ffs_rawreadwakeup(struct bio *bio)
984263bc 433{
10f3fee5 434 bio->bio_buf->b_cmd = BUF_CMD_DONE;
81b5c339 435 wakeup(bio);
984263bc 436}