Merge branch 'devel'
authorSepherosa Ziehau <sephe@dragonflybsd.org>
Mon, 24 Oct 2011 12:10:07 +0000 (20:10 +0800)
committerSepherosa Ziehau <sephe@dragonflybsd.org>
Mon, 24 Oct 2011 12:10:07 +0000 (20:10 +0800)
sys/kern/uipc_msg.c
sys/kern/uipc_socket.c
sys/net/netmsg.h
sys/netinet/tcp_input.c
sys/netinet/tcp_usrreq.c
sys/sys/mbuf.h
sys/sys/socketops.h
sys/sys/socketvar.h

index b2670ce..a81285f 100644 (file)
@@ -44,6 +44,7 @@
 #include <sys/thread.h>
 #include <sys/thread2.h>
 #include <sys/msgport2.h>
+#include <sys/mbuf.h>
 #include <vm/pmap.h>
 #include <net/netmsg2.h>
 
@@ -316,6 +317,23 @@ so_pru_send(struct socket *so, int flags, struct mbuf *m,
        return (error);
 }
 
+void
+so_pru_send_async(struct socket *so, int flags, struct mbuf *m,
+           struct sockaddr *addr, struct mbuf *control, struct thread *td)
+{
+       struct netmsg_pru_send *msg;
+
+       msg = &m->m_hdr.mh_sndmsg;
+       netmsg_init(&msg->base, so, &netisr_apanic_rport,
+                   0, so->so_proto->pr_usrreqs->pru_send);
+       msg->nm_flags = flags | PRUS_NOREPLY;
+       msg->nm_m = m;
+       msg->nm_addr = addr;
+       msg->nm_control = control;
+       msg->nm_td = td;
+       lwkt_sendmsg(so->so_port, &msg->base.lmsg);
+}
+
 int
 so_pru_sense(struct socket *so, struct stat *sb)
 {
index 868aac7..936158d 100644 (file)
@@ -97,6 +97,8 @@
 
 #include <machine/limits.h>
 
+extern int tcp_sosnd_agglim;
+
 #ifdef INET
 static int      do_setopt_accept_filter(struct socket *so, struct sockopt *sopt);
 #endif /* INET */
@@ -820,6 +822,170 @@ out:
        return (error);
 }
 
+int
+sosendtcp(struct socket *so, struct sockaddr *addr, struct uio *uio,
+       struct mbuf *top, struct mbuf *control, int flags,
+       struct thread *td)
+{
+       struct mbuf **mp;
+       struct mbuf *m;
+       size_t resid;
+       int space, len;
+       int error, mlen;
+       int allatonce;
+       int pru_flags;
+
+       if (uio) {
+               KKASSERT(top == NULL);
+               allatonce = 0;
+               resid = uio->uio_resid;
+       } else {
+               allatonce = 1;
+               resid = (size_t)top->m_pkthdr.len;
+#ifdef INVARIANTS
+               len = 0;
+               for (m = top; m; m = m->m_next)
+                       len += m->m_len;
+               KKASSERT(top->m_pkthdr.len == len);
+#endif
+       }
+
+       /*
+        * WARNING!  resid is unsigned, space and len are signed.  space
+        *           can wind up negative if the sockbuf is overcommitted.
+        *
+        * Also check to make sure that MSG_EOR isn't used on TCP
+        */
+       if (flags & MSG_EOR) {
+               error = EINVAL;
+               goto out;
+       }
+
+       if (control) {
+               /* TCP doesn't do control messages (rights, creds, etc) */
+               if (control->m_len) {
+                       error = EINVAL;
+                       goto out;
+               }
+               m_freem(control);       /* empty control, just free it */
+               control = NULL;
+       }
+
+       if (td->td_lwp != NULL)
+               td->td_lwp->lwp_ru.ru_msgsnd++;
+
+#define        gotoerr(errcode)        { error = errcode; goto release; }
+
+restart:
+       error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags));
+       if (error)
+               goto out;
+
+       do {
+               if (so->so_state & SS_CANTSENDMORE)
+                       gotoerr(EPIPE);
+               if (so->so_error) {
+                       error = so->so_error;
+                       so->so_error = 0;
+                       goto release;
+               }
+               if ((so->so_state & SS_ISCONNECTED) == 0 &&
+                   (so->so_state & SS_ISCONFIRMING) == 0)
+                       gotoerr(ENOTCONN);
+               if (allatonce && resid > so->so_snd.ssb_hiwat)
+                       gotoerr(EMSGSIZE);
+
+               space = ssb_space(&so->so_snd);
+               if (flags & MSG_OOB)
+                       space += 1024;
+               if ((space < 0 || (size_t)space < resid) && !allatonce &&
+                   space < so->so_snd.ssb_lowat) {
+                       if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT))
+                               gotoerr(EWOULDBLOCK);
+                       ssb_unlock(&so->so_snd);
+                       error = ssb_wait(&so->so_snd);
+                       if (error)
+                               goto out;
+                       goto restart;
+               }
+               mp = &top;
+               do {
+                   int cnt = 0;
+
+                   if (uio == NULL) {
+                       /*
+                        * Data is prepackaged in "top".
+                        */
+                       resid = 0;
+                   } else do {
+                       if (resid > INT_MAX)
+                               resid = INT_MAX;
+                       m = m_getl((int)resid, MB_WAIT, MT_DATA,
+                                  top == NULL ? M_PKTHDR : 0, &mlen);
+                       if (top == NULL) {
+                               m->m_pkthdr.len = 0;
+                               m->m_pkthdr.rcvif = NULL;
+                       }
+                       len = imin((int)szmin(mlen, resid), space);
+                       space -= len;
+                       error = uiomove(mtod(m, caddr_t), (size_t)len, uio);
+                       resid = uio->uio_resid;
+                       m->m_len = len;
+                       *mp = m;
+                       top->m_pkthdr.len += len;
+                       if (error)
+                               goto release;
+                       mp = &m->m_next;
+                       if (resid == 0)
+                               break;
+                       ++cnt;
+                   } while (space > 0 && cnt < tcp_sosnd_agglim);
+
+                   if (flags & MSG_OOB) {
+                           pru_flags = PRUS_OOB;
+                   } else if (resid > 0 && space > 0) {
+                           /* If there is more to send, set PRUS_MORETOCOME */
+                           pru_flags = PRUS_MORETOCOME;
+                   } else {
+                           pru_flags = 0;
+                   }
+
+                   /*
+                    * XXX all the SS_CANTSENDMORE checks previously
+                    * done could be out of date.  We could have recieved
+                    * a reset packet in an interrupt or maybe we slept
+                    * while doing page faults in uiomove() etc. We could
+                    * probably recheck again inside the splnet() protection
+                    * here, but there are probably other places that this
+                    * also happens.  We must rethink this.
+                    */
+                   if ((pru_flags & PRUS_OOB) ||
+                       (pru_flags & PRUS_MORETOCOME) == 0) {
+                           error = so_pru_send(so, pru_flags, top,
+                               NULL, NULL, td);
+                   } else {
+                           so_pru_send_async(so, pru_flags, top,
+                               NULL, NULL, td);
+                           error = 0;
+                   }
+
+                   top = NULL;
+                   mp = &top;
+                   if (error)
+                           goto release;
+               } while (resid && space > 0);
+       } while (resid);
+
+release:
+       ssb_unlock(&so->so_snd);
+out:
+       if (top)
+               m_freem(top);
+       if (control)
+               m_freem(control);
+       return (error);
+}
+
 /*
  * Implement receive operations on a socket.
  *
index 99df2b1..6ea7573 100644 (file)
@@ -191,6 +191,7 @@ struct netmsg_pru_send {
 #define PRUS_EOF               0x2
 #define PRUS_MORETOCOME                0x4
 #define PRUS_NAMALLOC          0x8
+#define PRUS_NOREPLY           0x10
 
 struct netmsg_pru_sense {
        struct netmsg_base      base;
index 4535dfc..7bb4d5f 100644 (file)
@@ -227,6 +227,9 @@ int tcp_autorcvbuf_max = 2*1024*1024;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_RW,
     &tcp_autorcvbuf_max, 0, "Max size of automatic receive buffer");
 
+int tcp_sosnd_agglim = 2;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, sosnd_agglim, CTLFLAG_RW,
+    &tcp_sosnd_agglim, 0, "TCP sosend mbuf aggregation limit");
 
 static void     tcp_dooptions(struct tcpopt *, u_char *, int, boolean_t);
 static void     tcp_pulloutofband(struct socket *,
index f23799c..36d7ab2 100644 (file)
@@ -257,13 +257,16 @@ tcp_usr_detach(netmsg_t msg)
                 TCPDEBUG1();                                   \
        } while(0)
 
-#define COMMON_END(req)                                                \
+#define COMMON_END1(req, noreply)                              \
        out: do {                                               \
                TCPDEBUG2(req);                                 \
-               lwkt_replymsg(&msg->lmsg, error);               \
+               if (!(noreply))                                 \
+                       lwkt_replymsg(&msg->lmsg, error);       \
                return;                                         \
        } while(0)
 
+#define COMMON_END(req)                COMMON_END1((req), 0)
+
 /*
  * Give the socket an address.
  */
@@ -735,6 +738,8 @@ tcp_usr_send(netmsg_t msg)
        struct tcpcb *tp;
        TCPDEBUG0;
 
+       KKASSERT(control == NULL);
+
        inp = so->so_pcb;
 
        if (inp == NULL) {
@@ -744,8 +749,6 @@ tcp_usr_send(netmsg_t msg)
                 * network interrupt in the non-critical section of sosend().
                 */
                m_freem(m);
-               if (control)
-                       m_freem(control);
                error = ECONNRESET;     /* XXX EPIPE? */
                tp = NULL;
                TCPDEBUG1();
@@ -753,16 +756,6 @@ tcp_usr_send(netmsg_t msg)
        }
        tp = intotcpcb(inp);
        TCPDEBUG1();
-       if (control) {
-               /* TCP doesn't do control messages (rights, creds, etc) */
-               if (control->m_len) {
-                       m_freem(control);
-                       m_freem(m);
-                       error = EINVAL;
-                       goto out;
-               }
-               m_freem(control);       /* empty control, just free it */
-       }
 
        /*
         * Don't let too much OOB data build up
@@ -810,8 +803,9 @@ tcp_usr_send(netmsg_t msg)
                                tp->t_flags &= ~TF_MORETOCOME;
                }
        }
-       COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB :
-                  ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
+       COMMON_END1((flags & PRUS_OOB) ? PRU_SENDOOB :
+                  ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND),
+                  (flags & PRUS_NOREPLY));
 }
 
 /*
@@ -882,7 +876,7 @@ struct pr_usrreqs tcp_usrreqs = {
        .pru_sense = pru_sense_null,
        .pru_shutdown = tcp_usr_shutdown,
        .pru_sockaddr = in_setsockaddr_dispatch,
-       .pru_sosend = sosend,
+       .pru_sosend = sosendtcp,
        .pru_soreceive = soreceive
 };
 
@@ -905,7 +899,7 @@ struct pr_usrreqs tcp6_usrreqs = {
        .pru_sense = pru_sense_null,
        .pru_shutdown = tcp_usr_shutdown,
        .pru_sockaddr = in6_mapped_sockaddr_dispatch,
-       .pru_sosend = sosend,
+       .pru_sosend = sosendtcp,
        .pru_soreceive = soreceive
 };
 #endif /* INET6 */
index 6e6f491..82c2820 100644 (file)
@@ -93,8 +93,13 @@ struct m_hdr {
 #ifdef MBUF_DEBUG
        const char *mh_lastfunc;
 #endif
-       struct netmsg_packet mh_netmsg; /* hardware->proto stack msg */
+       union {
+               struct netmsg_packet mhm_pkt;   /* hardware->proto stack msg */
+               struct netmsg_pru_send mhm_snd; /* usrspace->proto stack msg */
+       } mh_msgu;
 };
+#define mh_netmsg      mh_msgu.mhm_pkt
+#define mh_sndmsg      mh_msgu.mhm_snd
 
 /* pf stuff */
 struct pkthdr_pf {
index e2f2d47..2274435 100644 (file)
@@ -95,6 +95,9 @@ int so_pru_rcvoob (struct socket *so, struct mbuf *m, int flags);
 int so_pru_send (struct socket *so, int flags, struct mbuf *m,
                struct sockaddr *addr, struct mbuf *control,
                struct thread *td);
+void so_pru_send_async (struct socket *so, int flags, struct mbuf *m,
+               struct sockaddr *addr, struct mbuf *control,
+               struct thread *td);
 int so_pru_sense (struct socket *so, struct stat *sb);
 int so_pru_shutdown (struct socket *so);
 int so_pru_sockaddr (struct socket *so, struct sockaddr **nam);
index f5ac9a7..5f9c440 100644 (file)
@@ -437,6 +437,9 @@ int sosend (struct socket *so, struct sockaddr *addr, struct uio *uio,
 int    sosendudp (struct socket *so, struct sockaddr *addr, struct uio *uio,
                    struct mbuf *top, struct mbuf *control, int flags,
                    struct thread *td);
+int    sosendtcp (struct socket *so, struct sockaddr *addr, struct uio *uio,
+                   struct mbuf *top, struct mbuf *control, int flags,
+                   struct thread *td);
 int    sosetopt (struct socket *so, struct sockopt *sopt);
 int    soshutdown (struct socket *so, int how);
 void   sotoxsocket (struct socket *so, struct xsocket *xso);