mount{,d}(8): Use the pidfile(3) API for handling mountd's PID file.
[dragonfly.git] / sys / kern / vfs_journal.c
index 1e790b8..a8083d7 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $DragonFly: src/sys/kern/vfs_journal.c,v 1.29 2006/10/27 04:56:31 dillon Exp $
+ * $DragonFly: src/sys/kern/vfs_journal.c,v 1.33 2007/05/09 00:53:34 dillon Exp $
  */
 /*
  * The journaling protocol is intended to evolve into a two-way stream
@@ -76,7 +76,7 @@
 #include <sys/journal.h>
 #include <sys/file.h>
 #include <sys/proc.h>
-#include <sys/msfbuf.h>
+#include <sys/xio.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 
@@ -90,6 +90,8 @@
 
 #include <sys/file2.h>
 #include <sys/thread2.h>
+#include <sys/mplock2.h>
+#include <sys/spinlock2.h>
 
 static void journal_wthread(void *info);
 static void journal_rthread(void *info);
@@ -105,6 +107,8 @@ static void journal_abort(struct journal *jo,
 static void journal_commit(struct journal *jo,
                         struct journal_rawrecbeg **rawpp,
                         int bytes, int closeout);
+static void jrecord_data(struct jrecord *jrec,
+                       void *buf, int bytes, int dtype);
 
 
 MALLOC_DEFINE(M_JOURNAL, "journal", "Journaling structures");
@@ -116,14 +120,16 @@ journal_create_threads(struct journal *jo)
        jo->flags &= ~(MC_JOURNAL_STOP_REQ | MC_JOURNAL_STOP_IMM);
        jo->flags |= MC_JOURNAL_WACTIVE;
        lwkt_create(journal_wthread, jo, NULL, &jo->wthread,
-                       TDF_STOPREQ, -1, "journal w:%.*s", JIDMAX, jo->id);
+                   TDF_NOSTART, -1,
+                   "journal w:%.*s", JIDMAX, jo->id);
        lwkt_setpri(&jo->wthread, TDPRI_KERN_DAEMON);
        lwkt_schedule(&jo->wthread);
 
        if (jo->flags & MC_JOURNAL_WANT_FULLDUPLEX) {
            jo->flags |= MC_JOURNAL_RACTIVE;
            lwkt_create(journal_rthread, jo, NULL, &jo->rthread,
-                       TDF_STOPREQ, -1, "journal r:%.*s", JIDMAX, jo->id);
+                       TDF_NOSTART, -1,
+                       "journal r:%.*s", JIDMAX, jo->id);
            lwkt_setpri(&jo->rthread, TDPRI_KERN_DAEMON);
            lwkt_schedule(&jo->rthread);
        }
@@ -140,7 +146,7 @@ journal_destroy_threads(struct journal *jo, int flags)
     while (jo->flags & (MC_JOURNAL_WACTIVE | MC_JOURNAL_RACTIVE)) {
        tsleep(jo, 0, "jwait", hz);
        if (++wcount % 10 == 0) {
-           printf("Warning: journal %s waiting for descriptors to close\n",
+           kprintf("Warning: journal %s waiting for descriptors to close\n",
                jo->id);
        }
     }
@@ -164,10 +170,13 @@ journal_wthread(void *info)
 {
     struct journal *jo = info;
     struct journal_rawrecbeg *rawp;
-    int bytes;
     int error;
-    int avail;
-    int res;
+    size_t avail;
+    size_t bytes;
+    size_t res;
+
+    /* not MPSAFE yet */
+    get_mplock();
 
     for (;;) {
        /*
@@ -254,10 +263,11 @@ journal_wthread(void *info)
        bytes = res;
        jo->fifo.rindex += bytes;
        error = fp_write(jo->fp, 
-                       jo->fifo.membase + ((jo->fifo.rindex - bytes) & jo->fifo.mask),
-                       bytes, &res);
+                       jo->fifo.membase +
+                        ((jo->fifo.rindex - bytes) & jo->fifo.mask),
+                       bytes, &res, UIO_SYSSPACE);
        if (error) {
-           printf("journal_thread(%s) write, error %d\n", jo->id, error);
+           kprintf("journal_thread(%s) write, error %d\n", jo->id, error);
            /* XXX */
        } else {
            KKASSERT(res == bytes);
@@ -284,6 +294,7 @@ journal_wthread(void *info)
     jo->flags &= ~MC_JOURNAL_WACTIVE;
     wakeup(jo);
     wakeup(&jo->fifo.windex);
+    rel_mplock();
 }
 
 /*
@@ -298,12 +309,15 @@ journal_rthread(void *info)
     struct journal *jo = info;
     int64_t transid;
     int error;
-    int count;
-    int bytes;
+    size_t count;
+    size_t bytes;
 
     transid = 0;
     error = 0;
 
+    /* not MPSAFE yet */
+    get_mplock();
+
     for (;;) {
        /*
         * We have been asked to stop
@@ -316,20 +330,21 @@ journal_rthread(void *info)
         * stream.
         */
        if (transid == 0) {
-           error = fp_read(jo->fp, &ack, sizeof(ack), &count, 1);
+           error = fp_read(jo->fp, &ack, sizeof(ack), &count, 
+                           1, UIO_SYSSPACE);
 #if 0
-           printf("fp_read ack error %d count %d\n", error, count);
+           kprintf("fp_read ack error %d count %d\n", error, count);
 #endif
            if (error || count != sizeof(ack))
                break;
            if (error) {
-               printf("read error %d on receive stream\n", error);
+               kprintf("read error %d on receive stream\n", error);
                break;
            }
            if (ack.rbeg.begmagic != JREC_BEGMAGIC ||
                ack.rend.endmagic != JREC_ENDMAGIC
            ) {
-               printf("bad begmagic or endmagic on receive stream\n");
+               kprintf("bad begmagic or endmagic on receive stream\n");
                break;
            }
            transid = ack.rbeg.transid;
@@ -344,7 +359,8 @@ journal_rthread(void *info)
        bytes = jo->fifo.rindex - jo->fifo.xindex;
 
        if (bytes == 0) {
-           printf("warning: unsent data acknowledged transid %08llx\n", transid);
+           kprintf("warning: unsent data acknowledged transid %08llx\n",
+                   (long long)transid);
            tsleep(&jo->fifo.xindex, 0, "jrseq", hz);
            transid = 0;
            continue;
@@ -363,7 +379,9 @@ journal_rthread(void *info)
         */
        if (rawp->transid < transid) {
 #if 1
-           printf("ackskip %08llx/%08llx\n", rawp->transid, transid);
+           kprintf("ackskip %08llx/%08llx\n",
+                   (long long)rawp->transid,
+                   (long long)transid);
 #endif
            jo->fifo.xindex += (rawp->recsize + 15) & ~15;
            jo->total_acked += (rawp->recsize + 15) & ~15;
@@ -375,7 +393,9 @@ journal_rthread(void *info)
        }
        if (rawp->transid == transid) {
 #if 1
-           printf("ackskip %08llx/%08llx\n", rawp->transid, transid);
+           kprintf("ackskip %08llx/%08llx\n",
+                   (long long)rawp->transid,
+                   (long long)transid);
 #endif
            jo->fifo.xindex += (rawp->recsize + 15) & ~15;
            jo->total_acked += (rawp->recsize + 15) & ~15;
@@ -386,12 +406,14 @@ journal_rthread(void *info)
            transid = 0;
            continue;
        }
-       printf("warning: unsent data(2) acknowledged transid %08llx\n", transid);
+       kprintf("warning: unsent data(2) acknowledged transid %08llx\n",
+               (long long)transid);
        transid = 0;
     }
     jo->flags &= ~MC_JOURNAL_RACTIVE;
     wakeup(jo);
     wakeup(&jo->fifo.windex);
+    rel_mplock();
 }
 
 /*
@@ -930,7 +952,37 @@ void
 jrecord_leaf(struct jrecord *jrec, int16_t rectype, void *ptr, int bytes)
 {
     jrecord_write(jrec, rectype, bytes);
-    jrecord_data(jrec, ptr, bytes);
+    jrecord_data(jrec, ptr, bytes, JDATA_KERN);
+}
+
+void
+jrecord_leaf_uio(struct jrecord *jrec, int16_t rectype,
+                struct uio *uio)
+{
+    struct iovec *iov;
+    int i;
+
+    for (i = 0; i < uio->uio_iovcnt; ++i) {
+       iov = &uio->uio_iov[i];
+       if (iov->iov_len == 0)
+           continue;
+       if (uio->uio_segflg == UIO_SYSSPACE) {
+           jrecord_write(jrec, rectype, iov->iov_len);
+           jrecord_data(jrec, iov->iov_base, iov->iov_len, JDATA_KERN);
+       } else { /* UIO_USERSPACE */
+           jrecord_write(jrec, rectype, iov->iov_len);
+           jrecord_data(jrec, iov->iov_base, iov->iov_len, JDATA_USER);
+       }
+    }
+}
+
+void
+jrecord_leaf_xio(struct jrecord *jrec, int16_t rectype, xio_t xio)
+{
+    int bytes = xio->xio_npages * PAGE_SIZE;
+
+    jrecord_write(jrec, rectype, bytes);
+    jrecord_data(jrec, xio, bytes, JDATA_XIO);
 }
 
 /*
@@ -1019,11 +1071,12 @@ jrecord_write(struct jrecord *jrec, int16_t rectype, int bytes)
  * being pushed out.   Callers should be aware that even the associated
  * subrecord header may become inaccessible due to stream record pushouts.
  */
-void
-jrecord_data(struct jrecord *jrec, const void *buf, int bytes)
+static void
+jrecord_data(struct jrecord *jrec, void *buf, int bytes, int dtype)
 {
     int pusheditout;
     int extsize;
+    int xio_offset = 0;
 
     KKASSERT(bytes >= 0 && bytes <= jrec->residual);
 
@@ -1035,8 +1088,21 @@ jrecord_data(struct jrecord *jrec, const void *buf, int bytes)
        /*
         * Fill in any remaining space in the current stream record.
         */
-       bcopy(buf, jrec->stream_ptr, jrec->stream_residual);
-       buf = (const char *)buf + jrec->stream_residual;
+       switch (dtype) {
+       case JDATA_KERN:
+           bcopy(buf, jrec->stream_ptr, jrec->stream_residual);
+           break;
+       case JDATA_USER:
+           copyin(buf, jrec->stream_ptr, jrec->stream_residual);
+           break;
+       case JDATA_XIO:
+           xio_copy_xtok((xio_t)buf, xio_offset, jrec->stream_ptr,
+                         jrec->stream_residual);
+           xio_offset += jrec->stream_residual;
+           break;
+       }
+       if (dtype != JDATA_XIO)
+           buf = (char *)buf + jrec->stream_residual;
        bytes -= jrec->stream_residual;
        /*jrec->stream_ptr += jrec->stream_residual;*/
        jrec->residual -= jrec->stream_residual;
@@ -1069,7 +1135,17 @@ jrecord_data(struct jrecord *jrec, const void *buf, int bytes)
      * Push out any remaining bytes into the current stream record.
      */
     if (bytes) {
-       bcopy(buf, jrec->stream_ptr, bytes);
+       switch (dtype) {
+       case JDATA_KERN:
+           bcopy(buf, jrec->stream_ptr, bytes);
+           break;
+       case JDATA_USER:
+           copyin(buf, jrec->stream_ptr, bytes);
+           break;
+       case JDATA_XIO:
+           xio_copy_xtok((xio_t)buf, xio_offset, jrec->stream_ptr, bytes);
+           break;
+       }
        jrec->stream_ptr += bytes;
        jrec->stream_residual -= bytes;
        jrec->residual -= bytes;
@@ -1159,7 +1235,8 @@ jrecord_write_path(struct jrecord *jrec, int16_t rectype, struct namecache *ncp)
 again:
     pathlen = 0;
     for (scan = ncp; scan; scan = scan->nc_parent) {
-       pathlen += scan->nc_nlen + 1;
+       if (scan->nc_nlen > 0)
+           pathlen += scan->nc_nlen + 1;
     }
 
     if (pathlen <= sizeof(buf))
@@ -1172,6 +1249,8 @@ again:
      */
     index = pathlen;
     for (scan = ncp; scan; scan = scan->nc_parent) {
+       if (scan->nc_nlen == 0)
+           continue;
        if (scan->nc_nlen >= index) {
            if (base != buf)
                kfree(base, M_TEMP);
@@ -1225,8 +1304,12 @@ jrecord_write_vattr(struct jrecord *jrec, struct vattr *vat)
        jrecord_leaf(jrec, JLEAF_GEN, &vat->va_gen, sizeof(vat->va_gen));
     if (vat->va_flags != VNOVAL)
        jrecord_leaf(jrec, JLEAF_FLAGS, &vat->va_flags, sizeof(vat->va_flags));
-    if (vat->va_rdev != VNOVAL)
-       jrecord_leaf(jrec, JLEAF_UDEV, &vat->va_rdev, sizeof(vat->va_rdev));
+    if (vat->va_rmajor != VNOVAL) {
+       udev_t rdev = makeudev(vat->va_rmajor, vat->va_rminor);
+       jrecord_leaf(jrec, JLEAF_UDEV, &rdev, sizeof(rdev));
+       jrecord_leaf(jrec, JLEAF_UMAJOR, &vat->va_rmajor, sizeof(vat->va_rmajor));
+       jrecord_leaf(jrec, JLEAF_UMINOR, &vat->va_rminor, sizeof(vat->va_rminor));
+    }
 #if 0
     if (vat->va_filerev != VNOVAL)
        jrecord_leaf(jrec, JLEAF_FILEREV, &vat->va_filerev, sizeof(vat->va_filerev));
@@ -1268,30 +1351,46 @@ jrecord_write_cred(struct jrecord *jrec, struct thread *td, struct ucred *cred)
 void
 jrecord_write_vnode_ref(struct jrecord *jrec, struct vnode *vp)
 {
-    struct namecache *ncp;
+    struct nchandle nch;
 
-    TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) {
-       if ((ncp->nc_flag & (NCF_UNRESOLVED|NCF_DESTROYED)) == 0)
+    nch.mount = vp->v_mount;
+    spin_lock(&vp->v_spin);
+    TAILQ_FOREACH(nch.ncp, &vp->v_namecache, nc_vnode) {
+       if ((nch.ncp->nc_flag & (NCF_UNRESOLVED|NCF_DESTROYED)) == 0)
            break;
     }
-    if (ncp)
-       jrecord_write_path(jrec, JLEAF_PATH_REF, ncp);
+    if (nch.ncp) {
+       cache_hold(&nch);
+       spin_unlock(&vp->v_spin);
+       jrecord_write_path(jrec, JLEAF_PATH_REF, nch.ncp);
+       cache_drop(&nch);
+    } else {
+       spin_unlock(&vp->v_spin);
+    }
 }
 
 void
 jrecord_write_vnode_link(struct jrecord *jrec, struct vnode *vp, 
                         struct namecache *notncp)
 {
-    struct namecache *ncp;
+    struct nchandle nch;
 
-    TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) {
-       if (ncp == notncp)
+    nch.mount = vp->v_mount;
+    spin_lock(&vp->v_spin);
+    TAILQ_FOREACH(nch.ncp, &vp->v_namecache, nc_vnode) {
+       if (nch.ncp == notncp)
            continue;
-       if ((ncp->nc_flag & (NCF_UNRESOLVED|NCF_DESTROYED)) == 0)
+       if ((nch.ncp->nc_flag & (NCF_UNRESOLVED|NCF_DESTROYED)) == 0)
            break;
     }
-    if (ncp)
-       jrecord_write_path(jrec, JLEAF_PATH_REF, ncp);
+    if (nch.ncp) {
+       cache_hold(&nch);
+       spin_unlock(&vp->v_spin);
+       jrecord_write_path(jrec, JLEAF_PATH_REF, nch.ncp);
+       cache_drop(&nch);
+    } else {
+       spin_unlock(&vp->v_spin);
+    }
 }
 
 /*
@@ -1302,12 +1401,13 @@ jrecord_write_pagelist(struct jrecord *jrec, int16_t rectype,
                        struct vm_page **pglist, int *rtvals, int pgcount,
                        off_t offset)
 {
-    struct msf_buf *msf;
+    struct xio xio;
     int error;
     int b;
     int i;
 
     i = 0;
+    xio_init(&xio);
     while (i < pgcount) {
        /*
         * Find the next valid section.  Skip any invalid elements
@@ -1333,16 +1433,14 @@ jrecord_write_pagelist(struct jrecord *jrec, int16_t rectype,
         * And write it out.
         */
        if (i - b) {
-           error = msf_map_pagelist(&msf, pglist + b, i - b, 0);
+           error = xio_init_pages(&xio, pglist + b, i - b, XIOF_READ);
            if (error == 0) {
-               printf("RECORD PUTPAGES %d\n", msf_buf_bytes(msf));
                jrecord_leaf(jrec, JLEAF_SEEKPOS, &offset, sizeof(offset));
-               jrecord_leaf(jrec, rectype, 
-                            msf_buf_kva(msf), msf_buf_bytes(msf));
-               msf_buf_free(msf);
+               jrecord_leaf_xio(jrec, rectype, &xio);
            } else {
-               printf("jrecord_write_pagelist: mapping failure\n");
+               kprintf("jrecord_write_pagelist: xio init failure\n");
            }
+           xio_release(&xio);
            offset += (off_t)(i - b) << PAGE_SHIFT;
        }
     }
@@ -1351,37 +1449,16 @@ jrecord_write_pagelist(struct jrecord *jrec, int16_t rectype,
 /*
  * Write out the data represented by a UIO.
  */
-struct jwuio_info {
-    struct jrecord *jrec;
-    int16_t rectype;
-};
-
-static int jrecord_write_uio_callback(void *info, char *buf, int bytes);
-
 void
 jrecord_write_uio(struct jrecord *jrec, int16_t rectype, struct uio *uio)
 {
-    struct jwuio_info info = { jrec, rectype };
-    int error;
-
     if (uio->uio_segflg != UIO_NOCOPY) {
        jrecord_leaf(jrec, JLEAF_SEEKPOS, &uio->uio_offset, 
                     sizeof(uio->uio_offset));
-       error = msf_uio_iterate(uio, jrecord_write_uio_callback, &info);
-       if (error)
-           printf("XXX warning uio iterate failed %d\n", error);
+       jrecord_leaf_uio(jrec, rectype, uio);
     }
 }
 
-static int
-jrecord_write_uio_callback(void *info_arg, char *buf, int bytes)
-{
-    struct jwuio_info *info = info_arg;
-
-    jrecord_leaf(info->jrec, info->rectype, buf, bytes);
-    return(0);
-}
-
 void
 jrecord_file_data(struct jrecord *jrec, struct vnode *vp, 
                  off_t off, off_t bytes)