* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $DragonFly: src/sys/kern/vfs_journal.c,v 1.30 2006/12/23 00:35:04 swildner Exp $
+ * $DragonFly: src/sys/kern/vfs_journal.c,v 1.33 2007/05/09 00:53:34 dillon Exp $
*/
/*
* The journaling protocol is intended to evolve into a two-way stream
#include <sys/journal.h>
#include <sys/file.h>
#include <sys/proc.h>
-#include <sys/msfbuf.h>
+#include <sys/xio.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/file2.h>
#include <sys/thread2.h>
+#include <sys/mplock2.h>
+#include <sys/spinlock2.h>
static void journal_wthread(void *info);
static void journal_rthread(void *info);
static void journal_commit(struct journal *jo,
struct journal_rawrecbeg **rawpp,
int bytes, int closeout);
+static void jrecord_data(struct jrecord *jrec,
+ void *buf, int bytes, int dtype);
MALLOC_DEFINE(M_JOURNAL, "journal", "Journaling structures");
jo->flags &= ~(MC_JOURNAL_STOP_REQ | MC_JOURNAL_STOP_IMM);
jo->flags |= MC_JOURNAL_WACTIVE;
lwkt_create(journal_wthread, jo, NULL, &jo->wthread,
- TDF_STOPREQ, -1, "journal w:%.*s", JIDMAX, jo->id);
+ TDF_NOSTART, -1,
+ "journal w:%.*s", JIDMAX, jo->id);
lwkt_setpri(&jo->wthread, TDPRI_KERN_DAEMON);
lwkt_schedule(&jo->wthread);
if (jo->flags & MC_JOURNAL_WANT_FULLDUPLEX) {
jo->flags |= MC_JOURNAL_RACTIVE;
lwkt_create(journal_rthread, jo, NULL, &jo->rthread,
- TDF_STOPREQ, -1, "journal r:%.*s", JIDMAX, jo->id);
+ TDF_NOSTART, -1,
+ "journal r:%.*s", JIDMAX, jo->id);
lwkt_setpri(&jo->rthread, TDPRI_KERN_DAEMON);
lwkt_schedule(&jo->rthread);
}
{
struct journal *jo = info;
struct journal_rawrecbeg *rawp;
- int bytes;
int error;
- int avail;
- int res;
+ size_t avail;
+ size_t bytes;
+ size_t res;
+
+ /* not MPSAFE yet */
+ get_mplock();
for (;;) {
/*
bytes = res;
jo->fifo.rindex += bytes;
error = fp_write(jo->fp,
- jo->fifo.membase + ((jo->fifo.rindex - bytes) & jo->fifo.mask),
- bytes, &res);
+ jo->fifo.membase +
+ ((jo->fifo.rindex - bytes) & jo->fifo.mask),
+ bytes, &res, UIO_SYSSPACE);
if (error) {
kprintf("journal_thread(%s) write, error %d\n", jo->id, error);
/* XXX */
jo->flags &= ~MC_JOURNAL_WACTIVE;
wakeup(jo);
wakeup(&jo->fifo.windex);
+ rel_mplock();
}
/*
struct journal *jo = info;
int64_t transid;
int error;
- int count;
- int bytes;
+ size_t count;
+ size_t bytes;
transid = 0;
error = 0;
+ /* not MPSAFE yet */
+ get_mplock();
+
for (;;) {
/*
* We have been asked to stop
* stream.
*/
if (transid == 0) {
- error = fp_read(jo->fp, &ack, sizeof(ack), &count, 1);
+ error = fp_read(jo->fp, &ack, sizeof(ack), &count,
+ 1, UIO_SYSSPACE);
#if 0
kprintf("fp_read ack error %d count %d\n", error, count);
#endif
bytes = jo->fifo.rindex - jo->fifo.xindex;
if (bytes == 0) {
- kprintf("warning: unsent data acknowledged transid %08llx\n", transid);
+ kprintf("warning: unsent data acknowledged transid %08llx\n",
+ (long long)transid);
tsleep(&jo->fifo.xindex, 0, "jrseq", hz);
transid = 0;
continue;
*/
if (rawp->transid < transid) {
#if 1
- kprintf("ackskip %08llx/%08llx\n", rawp->transid, transid);
+ kprintf("ackskip %08llx/%08llx\n",
+ (long long)rawp->transid,
+ (long long)transid);
#endif
jo->fifo.xindex += (rawp->recsize + 15) & ~15;
jo->total_acked += (rawp->recsize + 15) & ~15;
}
if (rawp->transid == transid) {
#if 1
- kprintf("ackskip %08llx/%08llx\n", rawp->transid, transid);
+ kprintf("ackskip %08llx/%08llx\n",
+ (long long)rawp->transid,
+ (long long)transid);
#endif
jo->fifo.xindex += (rawp->recsize + 15) & ~15;
jo->total_acked += (rawp->recsize + 15) & ~15;
transid = 0;
continue;
}
- kprintf("warning: unsent data(2) acknowledged transid %08llx\n", transid);
+ kprintf("warning: unsent data(2) acknowledged transid %08llx\n",
+ (long long)transid);
transid = 0;
}
jo->flags &= ~MC_JOURNAL_RACTIVE;
wakeup(jo);
wakeup(&jo->fifo.windex);
+ rel_mplock();
}
/*
jrecord_leaf(struct jrecord *jrec, int16_t rectype, void *ptr, int bytes)
{
jrecord_write(jrec, rectype, bytes);
- jrecord_data(jrec, ptr, bytes);
+ jrecord_data(jrec, ptr, bytes, JDATA_KERN);
+}
+
+void
+jrecord_leaf_uio(struct jrecord *jrec, int16_t rectype,
+ struct uio *uio)
+{
+ struct iovec *iov;
+ int i;
+
+ for (i = 0; i < uio->uio_iovcnt; ++i) {
+ iov = &uio->uio_iov[i];
+ if (iov->iov_len == 0)
+ continue;
+ if (uio->uio_segflg == UIO_SYSSPACE) {
+ jrecord_write(jrec, rectype, iov->iov_len);
+ jrecord_data(jrec, iov->iov_base, iov->iov_len, JDATA_KERN);
+ } else { /* UIO_USERSPACE */
+ jrecord_write(jrec, rectype, iov->iov_len);
+ jrecord_data(jrec, iov->iov_base, iov->iov_len, JDATA_USER);
+ }
+ }
+}
+
+void
+jrecord_leaf_xio(struct jrecord *jrec, int16_t rectype, xio_t xio)
+{
+ int bytes = xio->xio_npages * PAGE_SIZE;
+
+ jrecord_write(jrec, rectype, bytes);
+ jrecord_data(jrec, xio, bytes, JDATA_XIO);
}
/*
* being pushed out. Callers should be aware that even the associated
* subrecord header may become inaccessible due to stream record pushouts.
*/
-void
-jrecord_data(struct jrecord *jrec, const void *buf, int bytes)
+static void
+jrecord_data(struct jrecord *jrec, void *buf, int bytes, int dtype)
{
int pusheditout;
int extsize;
+ int xio_offset = 0;
KKASSERT(bytes >= 0 && bytes <= jrec->residual);
/*
* Fill in any remaining space in the current stream record.
*/
- bcopy(buf, jrec->stream_ptr, jrec->stream_residual);
- buf = (const char *)buf + jrec->stream_residual;
+ switch (dtype) {
+ case JDATA_KERN:
+ bcopy(buf, jrec->stream_ptr, jrec->stream_residual);
+ break;
+ case JDATA_USER:
+ copyin(buf, jrec->stream_ptr, jrec->stream_residual);
+ break;
+ case JDATA_XIO:
+ xio_copy_xtok((xio_t)buf, xio_offset, jrec->stream_ptr,
+ jrec->stream_residual);
+ xio_offset += jrec->stream_residual;
+ break;
+ }
+ if (dtype != JDATA_XIO)
+ buf = (char *)buf + jrec->stream_residual;
bytes -= jrec->stream_residual;
/*jrec->stream_ptr += jrec->stream_residual;*/
jrec->residual -= jrec->stream_residual;
* Push out any remaining bytes into the current stream record.
*/
if (bytes) {
- bcopy(buf, jrec->stream_ptr, bytes);
+ switch (dtype) {
+ case JDATA_KERN:
+ bcopy(buf, jrec->stream_ptr, bytes);
+ break;
+ case JDATA_USER:
+ copyin(buf, jrec->stream_ptr, bytes);
+ break;
+ case JDATA_XIO:
+ xio_copy_xtok((xio_t)buf, xio_offset, jrec->stream_ptr, bytes);
+ break;
+ }
jrec->stream_ptr += bytes;
jrec->stream_residual -= bytes;
jrec->residual -= bytes;
again:
pathlen = 0;
for (scan = ncp; scan; scan = scan->nc_parent) {
- pathlen += scan->nc_nlen + 1;
+ if (scan->nc_nlen > 0)
+ pathlen += scan->nc_nlen + 1;
}
if (pathlen <= sizeof(buf))
*/
index = pathlen;
for (scan = ncp; scan; scan = scan->nc_parent) {
+ if (scan->nc_nlen == 0)
+ continue;
if (scan->nc_nlen >= index) {
if (base != buf)
kfree(base, M_TEMP);
jrecord_leaf(jrec, JLEAF_GEN, &vat->va_gen, sizeof(vat->va_gen));
if (vat->va_flags != VNOVAL)
jrecord_leaf(jrec, JLEAF_FLAGS, &vat->va_flags, sizeof(vat->va_flags));
- if (vat->va_rdev != VNOVAL)
- jrecord_leaf(jrec, JLEAF_UDEV, &vat->va_rdev, sizeof(vat->va_rdev));
+ if (vat->va_rmajor != VNOVAL) {
+ udev_t rdev = makeudev(vat->va_rmajor, vat->va_rminor);
+ jrecord_leaf(jrec, JLEAF_UDEV, &rdev, sizeof(rdev));
+ jrecord_leaf(jrec, JLEAF_UMAJOR, &vat->va_rmajor, sizeof(vat->va_rmajor));
+ jrecord_leaf(jrec, JLEAF_UMINOR, &vat->va_rminor, sizeof(vat->va_rminor));
+ }
#if 0
if (vat->va_filerev != VNOVAL)
jrecord_leaf(jrec, JLEAF_FILEREV, &vat->va_filerev, sizeof(vat->va_filerev));
void
jrecord_write_vnode_ref(struct jrecord *jrec, struct vnode *vp)
{
- struct namecache *ncp;
+ struct nchandle nch;
- TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) {
- if ((ncp->nc_flag & (NCF_UNRESOLVED|NCF_DESTROYED)) == 0)
+ nch.mount = vp->v_mount;
+ spin_lock(&vp->v_spin);
+ TAILQ_FOREACH(nch.ncp, &vp->v_namecache, nc_vnode) {
+ if ((nch.ncp->nc_flag & (NCF_UNRESOLVED|NCF_DESTROYED)) == 0)
break;
}
- if (ncp)
- jrecord_write_path(jrec, JLEAF_PATH_REF, ncp);
+ if (nch.ncp) {
+ cache_hold(&nch);
+ spin_unlock(&vp->v_spin);
+ jrecord_write_path(jrec, JLEAF_PATH_REF, nch.ncp);
+ cache_drop(&nch);
+ } else {
+ spin_unlock(&vp->v_spin);
+ }
}
void
jrecord_write_vnode_link(struct jrecord *jrec, struct vnode *vp,
struct namecache *notncp)
{
- struct namecache *ncp;
+ struct nchandle nch;
- TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) {
- if (ncp == notncp)
+ nch.mount = vp->v_mount;
+ spin_lock(&vp->v_spin);
+ TAILQ_FOREACH(nch.ncp, &vp->v_namecache, nc_vnode) {
+ if (nch.ncp == notncp)
continue;
- if ((ncp->nc_flag & (NCF_UNRESOLVED|NCF_DESTROYED)) == 0)
+ if ((nch.ncp->nc_flag & (NCF_UNRESOLVED|NCF_DESTROYED)) == 0)
break;
}
- if (ncp)
- jrecord_write_path(jrec, JLEAF_PATH_REF, ncp);
+ if (nch.ncp) {
+ cache_hold(&nch);
+ spin_unlock(&vp->v_spin);
+ jrecord_write_path(jrec, JLEAF_PATH_REF, nch.ncp);
+ cache_drop(&nch);
+ } else {
+ spin_unlock(&vp->v_spin);
+ }
}
/*
struct vm_page **pglist, int *rtvals, int pgcount,
off_t offset)
{
- struct msf_buf *msf;
+ struct xio xio;
int error;
int b;
int i;
i = 0;
+ xio_init(&xio);
while (i < pgcount) {
/*
* Find the next valid section. Skip any invalid elements
* And write it out.
*/
if (i - b) {
- error = msf_map_pagelist(&msf, pglist + b, i - b, 0);
+ error = xio_init_pages(&xio, pglist + b, i - b, XIOF_READ);
if (error == 0) {
- kprintf("RECORD PUTPAGES %d\n", msf_buf_bytes(msf));
jrecord_leaf(jrec, JLEAF_SEEKPOS, &offset, sizeof(offset));
- jrecord_leaf(jrec, rectype,
- msf_buf_kva(msf), msf_buf_bytes(msf));
- msf_buf_free(msf);
+ jrecord_leaf_xio(jrec, rectype, &xio);
} else {
- kprintf("jrecord_write_pagelist: mapping failure\n");
+ kprintf("jrecord_write_pagelist: xio init failure\n");
}
+ xio_release(&xio);
offset += (off_t)(i - b) << PAGE_SHIFT;
}
}
/*
* Write out the data represented by a UIO.
*/
-struct jwuio_info {
- struct jrecord *jrec;
- int16_t rectype;
-};
-
-static int jrecord_write_uio_callback(void *info, char *buf, int bytes);
-
void
jrecord_write_uio(struct jrecord *jrec, int16_t rectype, struct uio *uio)
{
- struct jwuio_info info = { jrec, rectype };
- int error;
-
if (uio->uio_segflg != UIO_NOCOPY) {
jrecord_leaf(jrec, JLEAF_SEEKPOS, &uio->uio_offset,
sizeof(uio->uio_offset));
- error = msf_uio_iterate(uio, jrecord_write_uio_callback, &info);
- if (error)
- kprintf("XXX warning uio iterate failed %d\n", error);
+ jrecord_leaf_uio(jrec, rectype, uio);
}
}
-static int
-jrecord_write_uio_callback(void *info_arg, char *buf, int bytes)
-{
- struct jwuio_info *info = info_arg;
-
- jrecord_leaf(info->jrec, info->rectype, buf, bytes);
- return(0);
-}
-
void
jrecord_file_data(struct jrecord *jrec, struct vnode *vp,
off_t off, off_t bytes)