kernel - Fix excessive mbuf use in nfs_realign()
[dragonfly.git] / sys / vfs / nfs / nfs_socket.c
index f7460d7..c241c3c 100644 (file)
@@ -195,13 +195,14 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
        struct sockaddr_in *sin;
        struct thread *td = &thread0; /* only used for socreate and sobind */
 
-       nmp->nm_so = NULL;
+       nmp->nm_so = so = NULL;
+       if (nmp->nm_flag & NFSMNT_FORCE)
+               return (EINVAL);
        saddr = nmp->nm_nam;
-       error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype,
+       error = socreate(saddr->sa_family, &so, nmp->nm_sotype,
                nmp->nm_soproto, td);
        if (error)
                goto bad;
-       so = nmp->nm_so;
        nmp->nm_soflags = so->so_proto->pr_flags;
 
        /*
@@ -328,10 +329,19 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
                nmp->nm_sdrtt[3] = 0;
        nmp->nm_maxasync_scaled = NFS_MINASYNC_SCALED;
        nmp->nm_timeouts = 0;
+
+       /*
+        * Assign nm_so last.  The moment nm_so is assigned the nfs_timer()
+        * can mess with the socket.
+        */
+       nmp->nm_so = so;
        return (0);
 
 bad:
-       nfs_disconnect(nmp);
+       if (so) {
+               soshutdown(so, SHUT_RDWR);
+               soclose(so, FNONBLOCK);
+       }
        return (error);
 }
 
@@ -351,9 +361,15 @@ nfs_reconnect(struct nfsmount *nmp, struct nfsreq *rep)
        int error;
 
        nfs_disconnect(nmp);
+       if (nmp->nm_rxstate >= NFSSVC_STOPPING)
+               return (EINTR);
        while ((error = nfs_connect(nmp, rep)) != 0) {
                if (error == EINTR || error == ERESTART)
                        return (EINTR);
+               if (error == EINVAL)
+                       return (error);
+               if (nmp->nm_rxstate >= NFSSVC_STOPPING)
+                       return (EINTR);
                (void) tsleep((caddr_t)&lbolt, 0, "nfscon", 0);
        }
 
@@ -631,8 +647,8 @@ tryagain:
                        if (error == 0 && sio.sb_cc != len) {
                            if (sio.sb_cc != 0)
                            log(LOG_INFO,
-                               "short receive (%d/%d) from nfs server %s\n",
-                               len - auio.uio_resid, len,
+                               "short receive (%zu/%d) from nfs server %s\n",
+                               (size_t)len - auio.uio_resid, len,
                                nmp->nm_mountp->mnt_stat.f_mntfromname);
                            error = EPIPE;
                        }
@@ -1224,7 +1240,7 @@ nfs_request_try(struct nfsreq *rep)
         * action possible is for r_mrep to be set (once we enqueue it).
         */
        if (rep->r_flags == 0xdeadc0de) {
-               print_backtrace();
+               print_backtrace(-1);
                panic("flags nbad\n");
        }
        KKASSERT((rep->r_flags & (R_LOCKED | R_ONREQQ)) == 0);
@@ -1244,6 +1260,12 @@ nfs_request_try(struct nfsreq *rep)
         */
        nfsstats.rpcrequests++;
 
+       if (nmp->nm_flag & NFSMNT_FORCE) {
+               rep->r_flags |= R_SOFTTERM;
+               rep->r_flags &= ~R_LOCKED;
+               return (0);
+       }
+
        /*
         * Chain request into list of outstanding requests. Be sure
         * to put it LAST so timer finds oldest requests first.  Note
@@ -1354,7 +1376,7 @@ nfs_request_waitreply(struct nfsreq *rep)
        rep->r_flags &= ~R_ONREQQ;
        --nmp->nm_reqqlen;
        if (TAILQ_FIRST(&nmp->nm_bioq) &&
-           nmp->nm_reqqlen == NFS_MAXASYNCBIO * 2 / 3) {
+           nmp->nm_reqqlen <= nfs_maxasyncbio * 2 / 3) {
                nfssvc_iod_writer_wakeup(nmp);
        }
        crit_exit();
@@ -1589,6 +1611,9 @@ nfs_rephead(int siz, struct nfsrv_descript *nd, struct nfssvc_sock *slp,
                         */
 #ifdef NFSKERB
                        XXX
+#else
+                       ktvout.tv_sec = 0;
+                       ktvout.tv_usec = 0;
 #endif
 
                        *tl++ = rpc_auth_kerb;
@@ -1947,7 +1972,7 @@ nfs_hardterm(struct nfsreq *rep, int islocked)
                        rep->r_info->state = NFSM_STATE_PROCESSREPLY;
                        nfssvc_iod_reader_wakeup(nmp);
                        if (TAILQ_FIRST(&nmp->nm_bioq) &&
-                           nmp->nm_reqqlen == NFS_MAXASYNCBIO * 2 / 3) {
+                           nmp->nm_reqqlen <= nfs_maxasyncbio * 2 / 3) {
                                nfssvc_iod_writer_wakeup(nmp);
                        }
                }
@@ -2128,49 +2153,48 @@ nfs_rcvunlock(struct nfsmount *nmp)
 }
 
 /*
- *     nfs_realign:
+ * nfs_realign:
  *
- *     Check for badly aligned mbuf data and realign by copying the unaligned
- *     portion of the data into a new mbuf chain and freeing the portions
- *     of the old chain that were replaced.
+ * Check for badly aligned mbuf data and realign by copying the unaligned
+ * portion of the data into a new mbuf chain and freeing the portions
+ * of the old chain that were replaced.
  *
- *     We cannot simply realign the data within the existing mbuf chain
- *     because the underlying buffers may contain other rpc commands and
- *     we cannot afford to overwrite them.
+ * We cannot simply realign the data within the existing mbuf chain
+ * because the underlying buffers may contain other rpc commands and
+ * we cannot afford to overwrite them.
  *
- *     We would prefer to avoid this situation entirely.  The situation does
- *     not occur with NFS/UDP and is supposed to only occassionally occur
- *     with TCP.  Use vfs.nfs.realign_count and realign_test to check this.
+ * We would prefer to avoid this situation entirely.  The situation does
+ * not occur with NFS/UDP and is supposed to only occassionally occur
+ * with TCP.  Use vfs.nfs.realign_count and realign_test to check this.
+ *
+ * NOTE!  MB_DONTWAIT cannot be used here.  The mbufs must be acquired
+ *       because the rpc request OR reply cannot be thrown away.  TCP NFS
+ *       mounts do not retry their RPCs unless the TCP connection itself
+ *       is dropped so throwing away a RPC will basically cause the NFS
+ *       operation to lockup indefinitely.
  */
 static void
 nfs_realign(struct mbuf **pm, int hsiz)
 {
        struct mbuf *m;
        struct mbuf *n = NULL;
-       int off = 0;
 
+       /*
+        * Check for misalignemnt
+        */
        ++nfs_realign_test;
-
        while ((m = *pm) != NULL) {
-               if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) {
-                       n = m_getl(m->m_len, MB_WAIT, MT_DATA, 0, NULL);
-                       n->m_len = 0;
+               if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3))
                        break;
-               }
                pm = &m->m_next;
        }
 
        /*
-        * If n is non-NULL, loop on m copying data, then replace the
-        * portion of the chain that had to be realigned.
+        * If misalignment found make a completely new copy.
         */
-       if (n != NULL) {
+       if (m) {
                ++nfs_realign_count;
-               while (m) {
-                       m_copyback(n, off, m->m_len, mtod(m, caddr_t));
-                       off += m->m_len;
-                       m = m->m_next;
-               }
+               n = m_dup_data(m, MB_WAIT);
                m_freem(*pm);
                *pm = n;
        }
@@ -2270,7 +2294,9 @@ nfs_getreq(struct nfsrv_descript *nd, struct nfsd *nfsd, int has_header)
                bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred));
                nd->nd_cr.cr_ref = 1;
                nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
+               nd->nd_cr.cr_ruid = nd->nd_cr.cr_svuid = nd->nd_cr.cr_uid;
                nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
+               nd->nd_cr.cr_rgid = nd->nd_cr.cr_svgid = nd->nd_cr.cr_gid;
                len = fxdr_unsigned(int, *tl);
                if (len < 0 || len > RPCAUTH_UNIXGIDS) {
                        m_freem(info.mrep);
@@ -2374,6 +2400,9 @@ nfs_getreq(struct nfsrv_descript *nd, struct nfsd *nfsd, int has_header)
                         */
 #ifdef NFSKERB
                        XXX
+#else
+                       tvout.tv_sec = 0;
+                       tvout.tv_usec = 0;
 #endif
 
                        tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);