From 0a8aee15320dd5b32f4f0651e3dee5e934057b01 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Fri, 12 Feb 2010 19:11:36 -0800 Subject: [PATCH] kernel - TMPFS - Add infrastructure to main kernel to help support TMPFS * Add buwrite(), similar to bdwrite() except it fakes the write, marks the pages as valid and dirty, and returns them to the VM system leaving the buffer cache buffer clean. This is used by tmpfs in tmpfs_write() and allows the entire VM page cache to be used to cache dirty tmpfs data instead of just the buffer cache. Also add vm_page_ste_validdirty() to suppor buwrite(). * Implement MNTK_SG_MPSAFE for future use by tmpfs. * Fix a bug in swap_strategy(). When the entire block being requested is sparse (has no swap assignments) the function was not properly biodone()'ing the original bio after zero-filling the space. --- sys/kern/vfs_bio.c | 67 +++++++++++++++++++++++++++++++++++++++++++ sys/kern/vfs_vopops.c | 3 +- sys/sys/buf.h | 1 + sys/vm/swap_pager.c | 17 +++++++---- sys/vm/vm_page.c | 13 +++++++++ sys/vm/vm_page.h | 1 + 6 files changed, 96 insertions(+), 6 deletions(-) diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index c5c9e6e926..533f121705 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -96,6 +96,7 @@ struct buf *buf; /* buffer header pool */ static void vfs_clean_pages(struct buf *bp); static void vfs_clean_one_page(struct buf *bp, int pageno, vm_page_t m); +static void vfs_dirty_one_page(struct buf *bp, int pageno, vm_page_t m); static void vfs_vmio_release(struct buf *bp); static int flushbufqueues(bufq_type_t q); static vm_page_t bio_page_alloc(vm_object_t obj, vm_pindex_t pg, int deficit); @@ -1092,6 +1093,39 @@ bdwrite(struct buf *bp) */ } +/* + * Fake write - return pages to VM system as dirty, leave the buffer clean. + * This is used by tmpfs. + * + * It is important for any VFS using this routine to NOT use it for + * IO_SYNC or IO_ASYNC operations which occur when the system really + * wants to flush VM pages to backing store. + */ +void +buwrite(struct buf *bp) +{ + vm_page_t m; + int i; + + /* + * Only works for VMIO buffers. If the buffer is already + * marked for delayed-write we can't avoid the bdwrite(). + */ + if ((bp->b_flags & B_VMIO) == 0 || (bp->b_flags & B_DELWRI)) { + bdwrite(bp); + return; + } + + /* + * Set valid & dirty. + */ + for (i = 0; i < bp->b_xio.xio_npages; i++) { + m = bp->b_xio.xio_pages[i]; + vfs_dirty_one_page(bp, i, m); + } + bqrelse(bp); +} + /* * bdirty: * @@ -4096,6 +4130,39 @@ vfs_clean_one_page(struct buf *bp, int pageno, vm_page_t m) vm_page_set_validclean(m, soff & PAGE_MASK, eoff - soff); } +/* + * Similar to vfs_clean_one_page() but sets the bits to valid and dirty. + * The page data is assumed to be valid (there is no zeroing here). + */ +static void +vfs_dirty_one_page(struct buf *bp, int pageno, vm_page_t m) +{ + int bcount; + int xoff; + int soff; + int eoff; + + /* + * Calculate offset range within the page but relative to buffer's + * loffset. loffset might be offset into the first page. + */ + xoff = (int)bp->b_loffset & PAGE_MASK; /* loffset offset into pg 0 */ + bcount = bp->b_bcount + xoff; /* offset adjusted */ + + if (pageno == 0) { + soff = xoff; + eoff = PAGE_SIZE; + } else { + soff = (pageno << PAGE_SHIFT); + eoff = soff + PAGE_SIZE; + } + if (eoff > bcount) + eoff = bcount; + if (soff >= eoff) + return; + vm_page_set_validdirty(m, soff & PAGE_MASK, eoff - soff); +} + /* * vfs_bio_clrbuf: * diff --git a/sys/kern/vfs_vopops.c b/sys/kern/vfs_vopops.c index 918d8f3983..17fc7ebe0d 100644 --- a/sys/kern/vfs_vopops.c +++ b/sys/kern/vfs_vopops.c @@ -819,7 +819,8 @@ vop_strategy(struct vop_ops *ops, struct vnode *vp, struct bio *bio) ap.a_bio = bio; if (vp->v_mount) { - VFS_MPLOCK1(vp->v_mount); + + VFS_MPLOCK_FLAG(vp->v_mount, MNTK_SG_MPSAFE); DO_OPS(ops, error, &ap, vop_strategy); VFS_MPUNLOCK(vp->v_mount); } else { diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 0eb08f1dbf..f2815077c6 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -404,6 +404,7 @@ int breadn (struct vnode *, off_t, int, off_t *, int *, int, struct buf **); int bwrite (struct buf *); void bdwrite (struct buf *); +void buwrite (struct buf *); void bawrite (struct buf *); void bdirty (struct buf *); void bheavy (struct buf *); diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 241ad35e33..03f1e0e974 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -1107,15 +1107,22 @@ swap_pager_strategy(vm_object_t object, struct bio *bio) /* * Now initiate all the I/O. Be careful looping on our chain as * I/O's may complete while we are still initiating them. + * + * If the request is a 100% sparse read no bios will be present + * and we just biodone() the buffer. */ nbio->bio_caller_info2.cluster_tail = NULL; bufx = nbio->bio_caller_info1.cluster_head; - while (bufx) { - biox = &bufx->b_bio1; - BUF_KERNPROC(bufx); - bufx = bufx->b_cluster_next; - vn_strategy(swapdev_vp, biox); + if (bufx) { + while (bufx) { + biox = &bufx->b_bio1; + BUF_KERNPROC(bufx); + bufx = bufx->b_cluster_next; + vn_strategy(swapdev_vp, biox); + } + } else { + biodone(bio); } /* diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index fcb2d49791..48401faa69 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -1554,6 +1554,19 @@ vm_page_set_validclean(vm_page_t m, int base, int size) } } +/* + * Set valid & dirty. Used by buwrite() + */ +void +vm_page_set_validdirty(vm_page_t m, int base, int size) +{ + int pagebits; + + pagebits = vm_page_bits(base, size); + m->valid |= pagebits; + m->dirty |= pagebits; +} + /* * Clear dirty bits. * diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index d04f48c098..dbcd95a055 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -505,6 +505,7 @@ void vm_page_wire (vm_page_t); void vm_page_unqueue (vm_page_t); void vm_page_unqueue_nowakeup (vm_page_t); void vm_page_set_validclean (vm_page_t, int, int); +void vm_page_set_validdirty (vm_page_t, int, int); void vm_page_set_valid (vm_page_t, int, int); void vm_page_set_dirty (vm_page_t, int, int); void vm_page_clear_dirty (vm_page_t, int, int); -- 2.41.0