From 5217bcbc2e93291d234b66b766c4801140f9d4f1 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Fri, 17 Sep 2010 01:45:04 -0700 Subject: [PATCH] network - Fix race in accept() - try #2 * The last fix wasn't good enough. Really try to fix it this time. Use a pool token and validate so_head after acquiring it to deal with races, interlock against 0-ref races (sockets can be on the so_comp/so_incomp queues with 0 references), and use it for the accept predicate. --- sys/kern/uipc_socket.c | 32 +++++++++++++++++++++++-------- sys/kern/uipc_socket2.c | 26 ++++++++++++++++++------- sys/kern/uipc_syscalls.c | 10 +++++++--- sys/netgraph/ksocket/ng_ksocket.c | 6 +++--- 4 files changed, 53 insertions(+), 21 deletions(-) diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 7cb76244a6..41d9655833 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -319,23 +319,39 @@ solisten(struct socket *so, int backlog, struct thread *td) void sofree(struct socket *so) { - struct socket *head = so->so_head; + struct socket *head; + + /* + * This is a bit hackish at the moment. We need to interlock + * any accept queue we are on before we potentially lose the + * last reference to avoid races against a re-reference from + * someone operating on the queue. + */ + while ((head = so->so_head) != NULL) { + lwkt_getpooltoken(head); + if (so->so_head == head) + break; + lwkt_relpooltoken(head); + } /* * Arbitrage the last free. */ KKASSERT(so->so_refs > 0); - if (atomic_fetchadd_int(&so->so_refs, -1) != 1) + if (atomic_fetchadd_int(&so->so_refs, -1) != 1) { + if (head) + lwkt_relpooltoken(head); return; + } KKASSERT(so->so_pcb == NULL && (so->so_state & SS_NOFDREF)); KKASSERT((so->so_state & SS_ASSERTINPROG) == 0); /* - * We're done, clean up + * We're done, remove ourselves from the accept queue we are + * on, if we are on one. */ if (head != NULL) { - lwkt_gettoken(&head->so_rcv.ssb_token); if (so->so_state & SS_INCOMP) { TAILQ_REMOVE(&head->so_incomp, so, so_list); head->so_incqlen--; @@ -346,14 +362,14 @@ sofree(struct socket *so) * accept(2) may hang after select(2) indicated * that the listening socket was ready. */ - lwkt_reltoken(&head->so_rcv.ssb_token); + lwkt_relpooltoken(head); return; } else { panic("sofree: not queued"); } soclrstate(so, SS_INCOMP); so->so_head = NULL; - lwkt_reltoken(&head->so_rcv.ssb_token); + lwkt_relpooltoken(head); } ssb_release(&so->so_snd, so); sorflush(so); @@ -400,7 +416,7 @@ drop: error = error2; } discard: - lwkt_gettoken(&so->so_rcv.ssb_token); + lwkt_getpooltoken(so); if (so->so_options & SO_ACCEPTCONN) { struct socket *sp; @@ -419,7 +435,7 @@ discard: soaborta(sp); } } - lwkt_reltoken(&so->so_rcv.ssb_token); + lwkt_relpooltoken(so); if (so->so_state & SS_NOFDREF) panic("soclose: NOFDREF"); sosetstate(so, SS_NOFDREF); /* take ref */ diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c index 6a1fabedd6..757e0e08ef 100644 --- a/sys/kern/uipc_socket2.c +++ b/sys/kern/uipc_socket2.c @@ -222,7 +222,14 @@ soisconnecting(struct socket *so) void soisconnected(struct socket *so) { - struct socket *head = so->so_head; + struct socket *head; + + while ((head = so->so_head) != NULL) { + lwkt_getpooltoken(head); + if (so->so_head == head) + break; + lwkt_relpooltoken(head); + } soclrstate(so, SS_ISCONNECTING | SS_ISDISCONNECTING | SS_ISCONFIRMING); sosetstate(so, SS_ISCONNECTED); @@ -233,20 +240,19 @@ soisconnected(struct socket *so) atomic_set_int(&so->so_rcv.ssb_flags, SSB_UPCALL); so->so_options &= ~SO_ACCEPTFILTER; so->so_upcall(so, so->so_upcallarg, 0); + lwkt_relpooltoken(head); return; } /* * Listen socket are not per-cpu. */ - lwkt_gettoken(&head->so_rcv.ssb_token); TAILQ_REMOVE(&head->so_incomp, so, so_list); head->so_incqlen--; TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); head->so_qlen++; sosetstate(so, SS_COMP); soclrstate(so, SS_INCOMP); - lwkt_reltoken(&head->so_rcv.ssb_token); /* * XXX head may be on a different protocol thread. @@ -259,6 +265,8 @@ soisconnected(struct socket *so) sorwakeup(so); sowwakeup(so); } + if (head) + lwkt_relpooltoken(head); } void @@ -381,7 +389,7 @@ sonewconn(struct socket *head, int connstatus) (SSB_AUTOSIZE | SSB_AUTOLOWAT); so->so_snd.ssb_flags |= head->so_snd.ssb_flags & (SSB_AUTOSIZE | SSB_AUTOLOWAT); - lwkt_gettoken(&head->so_rcv.ssb_token); + lwkt_getpooltoken(head); if (connstatus) { TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); sosetstate(so, SS_COMP); @@ -399,7 +407,7 @@ sonewconn(struct socket *head, int connstatus) sosetstate(so, SS_INCOMP); head->so_incqlen++; } - lwkt_reltoken(&head->so_rcv.ssb_token); + lwkt_relpooltoken(head); if (connstatus) { /* * XXX head may be on a different protocol thread. @@ -494,12 +502,16 @@ sowakeup(struct socket *so, struct signalsockbuf *ssb) * This is a bit of a hack. Multiple threads can wind up scanning * ki_mlist concurrently due to the fact that this function can be * called on a foreign socket, so we can't afford to block here. + * + * We need the pool token for (so) (likely the listne socket if + * SSB_MEVENT is set) because the predicate function may have + * to access the accept queue. */ if (ssb->ssb_flags & SSB_MEVENT) { struct netmsg_so_notify *msg, *nmsg; lwkt_gettoken(&kq_token); - lwkt_gettoken_hard(&ssb->ssb_token); + lwkt_getpooltoken(so); TAILQ_FOREACH_MUTABLE(msg, &kqinfo->ki_mlist, nm_list, nmsg) { if (msg->nm_predicate(msg)) { TAILQ_REMOVE(&kqinfo->ki_mlist, msg, nm_list); @@ -509,7 +521,7 @@ sowakeup(struct socket *so, struct signalsockbuf *ssb) } if (TAILQ_EMPTY(&ssb->ssb_kq.ki_mlist)) atomic_clear_int(&ssb->ssb_flags, SSB_MEVENT); - lwkt_reltoken_hard(&ssb->ssb_token); + lwkt_relpooltoken(so); lwkt_reltoken(&kq_token); } } diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index 6686d02ea4..c30ea34e55 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -214,6 +214,10 @@ sys_listen(struct listen_args *uap) /* * Returns the accepted socket as well. + * + * NOTE! The sockets sitting on so_comp/so_incomp might have 0 refs, the + * pool token is absolutely required to avoid a sofree() race, + * as well as to avoid tailq handling races. */ static boolean_t soaccept_predicate(struct netmsg_so_notify *msg) @@ -225,7 +229,7 @@ soaccept_predicate(struct netmsg_so_notify *msg) msg->base.lmsg.ms_error = head->so_error; return (TRUE); } - lwkt_gettoken(&head->so_rcv.ssb_token); + lwkt_getpooltoken(head); if (!TAILQ_EMPTY(&head->so_comp)) { /* Abuse nm_so field as copy in/copy out parameter. XXX JH */ so = TAILQ_FIRST(&head->so_comp); @@ -235,13 +239,13 @@ soaccept_predicate(struct netmsg_so_notify *msg) so->so_head = NULL; soreference(so); - lwkt_reltoken(&head->so_rcv.ssb_token); + lwkt_relpooltoken(head); msg->base.lmsg.ms_error = 0; msg->base.nm_so = so; return (TRUE); } - lwkt_reltoken(&head->so_rcv.ssb_token); + lwkt_relpooltoken(head); if (head->so_state & SS_CANTRCVMORE) { msg->base.lmsg.ms_error = ECONNABORTED; return (TRUE); diff --git a/sys/netgraph/ksocket/ng_ksocket.c b/sys/netgraph/ksocket/ng_ksocket.c index 51e22218a4..210e0e4899 100644 --- a/sys/netgraph/ksocket/ng_ksocket.c +++ b/sys/netgraph/ksocket/ng_ksocket.c @@ -1166,10 +1166,10 @@ ng_ksocket_finish_accept(priv_p priv, struct ng_mesg **rptr) priv_p priv2; int len; - lwkt_gettoken(&head->so_rcv.ssb_token); + lwkt_getpooltoken(head); so = TAILQ_FIRST(&head->so_comp); if (so == NULL) { /* Should never happen */ - lwkt_reltoken(&head->so_rcv.ssb_token); + lwkt_relpooltoken(head); return; } TAILQ_REMOVE(&head->so_comp, so, so_list); @@ -1178,7 +1178,7 @@ ng_ksocket_finish_accept(priv_p priv, struct ng_mesg **rptr) so->so_head = NULL; soreference(so); - lwkt_reltoken(&head->so_rcv.ssb_token); + lwkt_relpooltoken(head); /* XXX KNOTE(&head->so_rcv.ssb_sel.si_note, 0); */ -- 2.41.0