From: Sepherosa Ziehau Date: Mon, 24 Oct 2011 12:10:07 +0000 (+0800) Subject: Merge branch 'devel' X-Git-Tag: v3.0.0~833 X-Git-Url: https://gitweb.dragonflybsd.org/~tuxillo/dragonfly.git/commitdiff_plain/ead125b874628dc824f4c2b1a3b12c4520fae137?hp=9c5a96cce3105635e541a00723851979cdd33b14 Merge branch 'devel' --- diff --git a/sys/kern/uipc_msg.c b/sys/kern/uipc_msg.c index b2670ce319..a81285fe4d 100644 --- a/sys/kern/uipc_msg.c +++ b/sys/kern/uipc_msg.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -316,6 +317,23 @@ so_pru_send(struct socket *so, int flags, struct mbuf *m, return (error); } +void +so_pru_send_async(struct socket *so, int flags, struct mbuf *m, + struct sockaddr *addr, struct mbuf *control, struct thread *td) +{ + struct netmsg_pru_send *msg; + + msg = &m->m_hdr.mh_sndmsg; + netmsg_init(&msg->base, so, &netisr_apanic_rport, + 0, so->so_proto->pr_usrreqs->pru_send); + msg->nm_flags = flags | PRUS_NOREPLY; + msg->nm_m = m; + msg->nm_addr = addr; + msg->nm_control = control; + msg->nm_td = td; + lwkt_sendmsg(so->so_port, &msg->base.lmsg); +} + int so_pru_sense(struct socket *so, struct stat *sb) { diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 868aac7221..936158d569 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -97,6 +97,8 @@ #include +extern int tcp_sosnd_agglim; + #ifdef INET static int do_setopt_accept_filter(struct socket *so, struct sockopt *sopt); #endif /* INET */ @@ -820,6 +822,170 @@ out: return (error); } +int +sosendtcp(struct socket *so, struct sockaddr *addr, struct uio *uio, + struct mbuf *top, struct mbuf *control, int flags, + struct thread *td) +{ + struct mbuf **mp; + struct mbuf *m; + size_t resid; + int space, len; + int error, mlen; + int allatonce; + int pru_flags; + + if (uio) { + KKASSERT(top == NULL); + allatonce = 0; + resid = uio->uio_resid; + } else { + allatonce = 1; + resid = (size_t)top->m_pkthdr.len; +#ifdef INVARIANTS + len = 0; + for (m = top; m; m = m->m_next) + len += m->m_len; + KKASSERT(top->m_pkthdr.len == len); +#endif + } + + /* + * WARNING! resid is unsigned, space and len are signed. space + * can wind up negative if the sockbuf is overcommitted. + * + * Also check to make sure that MSG_EOR isn't used on TCP + */ + if (flags & MSG_EOR) { + error = EINVAL; + goto out; + } + + if (control) { + /* TCP doesn't do control messages (rights, creds, etc) */ + if (control->m_len) { + error = EINVAL; + goto out; + } + m_freem(control); /* empty control, just free it */ + control = NULL; + } + + if (td->td_lwp != NULL) + td->td_lwp->lwp_ru.ru_msgsnd++; + +#define gotoerr(errcode) { error = errcode; goto release; } + +restart: + error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags)); + if (error) + goto out; + + do { + if (so->so_state & SS_CANTSENDMORE) + gotoerr(EPIPE); + if (so->so_error) { + error = so->so_error; + so->so_error = 0; + goto release; + } + if ((so->so_state & SS_ISCONNECTED) == 0 && + (so->so_state & SS_ISCONFIRMING) == 0) + gotoerr(ENOTCONN); + if (allatonce && resid > so->so_snd.ssb_hiwat) + gotoerr(EMSGSIZE); + + space = ssb_space(&so->so_snd); + if (flags & MSG_OOB) + space += 1024; + if ((space < 0 || (size_t)space < resid) && !allatonce && + space < so->so_snd.ssb_lowat) { + if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) + gotoerr(EWOULDBLOCK); + ssb_unlock(&so->so_snd); + error = ssb_wait(&so->so_snd); + if (error) + goto out; + goto restart; + } + mp = ⊤ + do { + int cnt = 0; + + if (uio == NULL) { + /* + * Data is prepackaged in "top". + */ + resid = 0; + } else do { + if (resid > INT_MAX) + resid = INT_MAX; + m = m_getl((int)resid, MB_WAIT, MT_DATA, + top == NULL ? M_PKTHDR : 0, &mlen); + if (top == NULL) { + m->m_pkthdr.len = 0; + m->m_pkthdr.rcvif = NULL; + } + len = imin((int)szmin(mlen, resid), space); + space -= len; + error = uiomove(mtod(m, caddr_t), (size_t)len, uio); + resid = uio->uio_resid; + m->m_len = len; + *mp = m; + top->m_pkthdr.len += len; + if (error) + goto release; + mp = &m->m_next; + if (resid == 0) + break; + ++cnt; + } while (space > 0 && cnt < tcp_sosnd_agglim); + + if (flags & MSG_OOB) { + pru_flags = PRUS_OOB; + } else if (resid > 0 && space > 0) { + /* If there is more to send, set PRUS_MORETOCOME */ + pru_flags = PRUS_MORETOCOME; + } else { + pru_flags = 0; + } + + /* + * XXX all the SS_CANTSENDMORE checks previously + * done could be out of date. We could have recieved + * a reset packet in an interrupt or maybe we slept + * while doing page faults in uiomove() etc. We could + * probably recheck again inside the splnet() protection + * here, but there are probably other places that this + * also happens. We must rethink this. + */ + if ((pru_flags & PRUS_OOB) || + (pru_flags & PRUS_MORETOCOME) == 0) { + error = so_pru_send(so, pru_flags, top, + NULL, NULL, td); + } else { + so_pru_send_async(so, pru_flags, top, + NULL, NULL, td); + error = 0; + } + + top = NULL; + mp = ⊤ + if (error) + goto release; + } while (resid && space > 0); + } while (resid); + +release: + ssb_unlock(&so->so_snd); +out: + if (top) + m_freem(top); + if (control) + m_freem(control); + return (error); +} + /* * Implement receive operations on a socket. * diff --git a/sys/net/netmsg.h b/sys/net/netmsg.h index 99df2b17c4..6ea757327b 100644 --- a/sys/net/netmsg.h +++ b/sys/net/netmsg.h @@ -191,6 +191,7 @@ struct netmsg_pru_send { #define PRUS_EOF 0x2 #define PRUS_MORETOCOME 0x4 #define PRUS_NAMALLOC 0x8 +#define PRUS_NOREPLY 0x10 struct netmsg_pru_sense { struct netmsg_base base; diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 4535dfc6b7..7bb4d5f52f 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -227,6 +227,9 @@ int tcp_autorcvbuf_max = 2*1024*1024; SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_RW, &tcp_autorcvbuf_max, 0, "Max size of automatic receive buffer"); +int tcp_sosnd_agglim = 2; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, sosnd_agglim, CTLFLAG_RW, + &tcp_sosnd_agglim, 0, "TCP sosend mbuf aggregation limit"); static void tcp_dooptions(struct tcpopt *, u_char *, int, boolean_t); static void tcp_pulloutofband(struct socket *, diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index f23799cad6..36d7ab2b99 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -257,13 +257,16 @@ tcp_usr_detach(netmsg_t msg) TCPDEBUG1(); \ } while(0) -#define COMMON_END(req) \ +#define COMMON_END1(req, noreply) \ out: do { \ TCPDEBUG2(req); \ - lwkt_replymsg(&msg->lmsg, error); \ + if (!(noreply)) \ + lwkt_replymsg(&msg->lmsg, error); \ return; \ } while(0) +#define COMMON_END(req) COMMON_END1((req), 0) + /* * Give the socket an address. */ @@ -735,6 +738,8 @@ tcp_usr_send(netmsg_t msg) struct tcpcb *tp; TCPDEBUG0; + KKASSERT(control == NULL); + inp = so->so_pcb; if (inp == NULL) { @@ -744,8 +749,6 @@ tcp_usr_send(netmsg_t msg) * network interrupt in the non-critical section of sosend(). */ m_freem(m); - if (control) - m_freem(control); error = ECONNRESET; /* XXX EPIPE? */ tp = NULL; TCPDEBUG1(); @@ -753,16 +756,6 @@ tcp_usr_send(netmsg_t msg) } tp = intotcpcb(inp); TCPDEBUG1(); - if (control) { - /* TCP doesn't do control messages (rights, creds, etc) */ - if (control->m_len) { - m_freem(control); - m_freem(m); - error = EINVAL; - goto out; - } - m_freem(control); /* empty control, just free it */ - } /* * Don't let too much OOB data build up @@ -810,8 +803,9 @@ tcp_usr_send(netmsg_t msg) tp->t_flags &= ~TF_MORETOCOME; } } - COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB : - ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); + COMMON_END1((flags & PRUS_OOB) ? PRU_SENDOOB : + ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND), + (flags & PRUS_NOREPLY)); } /* @@ -882,7 +876,7 @@ struct pr_usrreqs tcp_usrreqs = { .pru_sense = pru_sense_null, .pru_shutdown = tcp_usr_shutdown, .pru_sockaddr = in_setsockaddr_dispatch, - .pru_sosend = sosend, + .pru_sosend = sosendtcp, .pru_soreceive = soreceive }; @@ -905,7 +899,7 @@ struct pr_usrreqs tcp6_usrreqs = { .pru_sense = pru_sense_null, .pru_shutdown = tcp_usr_shutdown, .pru_sockaddr = in6_mapped_sockaddr_dispatch, - .pru_sosend = sosend, + .pru_sosend = sosendtcp, .pru_soreceive = soreceive }; #endif /* INET6 */ diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 6e6f491cdb..82c282072c 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -93,8 +93,13 @@ struct m_hdr { #ifdef MBUF_DEBUG const char *mh_lastfunc; #endif - struct netmsg_packet mh_netmsg; /* hardware->proto stack msg */ + union { + struct netmsg_packet mhm_pkt; /* hardware->proto stack msg */ + struct netmsg_pru_send mhm_snd; /* usrspace->proto stack msg */ + } mh_msgu; }; +#define mh_netmsg mh_msgu.mhm_pkt +#define mh_sndmsg mh_msgu.mhm_snd /* pf stuff */ struct pkthdr_pf { diff --git a/sys/sys/socketops.h b/sys/sys/socketops.h index e2f2d474fc..2274435127 100644 --- a/sys/sys/socketops.h +++ b/sys/sys/socketops.h @@ -95,6 +95,9 @@ int so_pru_rcvoob (struct socket *so, struct mbuf *m, int flags); int so_pru_send (struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td); +void so_pru_send_async (struct socket *so, int flags, struct mbuf *m, + struct sockaddr *addr, struct mbuf *control, + struct thread *td); int so_pru_sense (struct socket *so, struct stat *sb); int so_pru_shutdown (struct socket *so); int so_pru_sockaddr (struct socket *so, struct sockaddr **nam); diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index f5ac9a77b3..5f9c440b97 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -437,6 +437,9 @@ int sosend (struct socket *so, struct sockaddr *addr, struct uio *uio, int sosendudp (struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td); +int sosendtcp (struct socket *so, struct sockaddr *addr, struct uio *uio, + struct mbuf *top, struct mbuf *control, int flags, + struct thread *td); int sosetopt (struct socket *so, struct sockopt *sopt); int soshutdown (struct socket *so, int how); void sotoxsocket (struct socket *so, struct xsocket *xso);