From 14343ad3b815bafa1bcec3656de2d614fcc75bec Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Mon, 6 Sep 2010 13:34:01 -0700 Subject: [PATCH] kernel - Fix numerous MP issues with sockbuf's and ssb_flags part 1/2 * Use atomic ops for ssb_flags handling * Use atomic_cmpset_int() to interlock SSB_LOCK with SSB_WANT, and SSB_WAIT with SSB_WAKEUP. Note in particular that WAIT/WAKEUP assumes the client side of the socket is single threaded via an appropriate lock. This needs more work. --- sys/kern/sys_socket.c | 8 +-- sys/kern/uipc_msg.c | 2 +- sys/kern/uipc_socket.c | 43 +++++++----- sys/kern/uipc_socket2.c | 110 ++++++++++++++++++++++++------ sys/kern/uipc_usrreq.c | 4 +- sys/kern/vfs_aio.c | 14 ++-- sys/net/accf_data/accf_data.c | 2 +- sys/net/accf_http/accf_http.c | 10 +-- sys/netgraph/ksocket/ng_ksocket.c | 12 ++-- sys/netinet/tcp_input.c | 4 +- sys/netinet/tcp_output.c | 4 +- sys/netinet/tcp_usrreq.c | 4 +- sys/netproto/smb/smb_trantcp.c | 10 +-- sys/sys/socketvar.h | 80 +++++++++++++--------- sys/sys/socketvar2.h | 40 ++++++++--- sys/vfs/nfs/nfs_socket.c | 4 +- sys/vfs/nfs/nfs_syscalls.c | 8 +-- sys/vfs/portal/portal_vnops.c | 4 +- 18 files changed, 238 insertions(+), 125 deletions(-) diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c index 6990da6509..0b0eeb03a5 100644 --- a/sys/kern/sys_socket.c +++ b/sys/kern/sys_socket.c @@ -137,12 +137,12 @@ soo_ioctl(struct file *fp, u_long cmd, caddr_t data, case FIOASYNC: if (*(int *)data) { so->so_state |= SS_ASYNC; - so->so_rcv.ssb_flags |= SSB_ASYNC; - so->so_snd.ssb_flags |= SSB_ASYNC; + atomic_set_int(&so->so_rcv.ssb_flags, SSB_ASYNC); + atomic_set_int(&so->so_snd.ssb_flags, SSB_ASYNC); } else { so->so_state &= ~SS_ASYNC; - so->so_rcv.ssb_flags &= ~SSB_ASYNC; - so->so_snd.ssb_flags &= ~SSB_ASYNC; + atomic_clear_int(&so->so_rcv.ssb_flags, SSB_ASYNC); + atomic_clear_int(&so->so_snd.ssb_flags, SSB_ASYNC); } error = 0; break; diff --git a/sys/kern/uipc_msg.c b/sys/kern/uipc_msg.c index f1a5b05c74..02a895fe13 100644 --- a/sys/kern/uipc_msg.c +++ b/sys/kern/uipc_msg.c @@ -623,8 +623,8 @@ netmsg_so_notify(netmsg_t netmsg) } else { lwkt_gettoken(&kq_token); TAILQ_INSERT_TAIL(&ssb->ssb_kq.ki_mlist, msg, nm_list); + atomic_set_int(&ssb->ssb_flags, SSB_MEVENT); lwkt_reltoken(&kq_token); - ssb->ssb_flags |= SSB_MEVENT; } } diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index e170714fe2..12bf054ee6 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1178,19 +1178,23 @@ sorflush(struct socket *so) struct protosw *pr = so->so_proto; struct signalsockbuf asb; - ssb->ssb_flags |= SSB_NOINTR; - (void) ssb_lock(ssb, M_WAITOK); + atomic_set_int(&ssb->ssb_flags, SSB_NOINTR); - crit_enter(); + ssb_lock(ssb, M_WAITOK); socantrcvmore(so); - ssb_unlock(ssb); asb = *ssb; - bzero((caddr_t)ssb, sizeof (*ssb)); - if (asb.ssb_flags & SSB_KNOTE) { - ssb->ssb_kq.ki_note = asb.ssb_kq.ki_note; - ssb->ssb_flags |= SSB_KNOTE; - } - crit_exit(); + + /* + * Can't just blow up the ssb structure here + */ + ssb->ssb_timeo = 0; + ssb->ssb_unused01 = 0; + ssb->ssb_lowat = 0; + ssb->ssb_hiwat = 0; + ssb->ssb_mbmax = 0; + atomic_clear_int(&ssb->ssb_flags, SSB_CLEAR_MASK); + + ssb_unlock(ssb); if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) (*pr->pr_domain->dom_dispose)(asb.ssb_mb); @@ -1317,6 +1321,7 @@ sosetopt(struct socket *so, struct sockopt *sopt) struct linger l; struct timeval tv; u_long val; + struct signalsockbuf *sotmp; error = 0; sopt->sopt_dir = SOPT_SET; @@ -1393,8 +1398,10 @@ sosetopt(struct socket *so, struct sockopt *sopt) error = ENOBUFS; goto bad; } - (sopt->sopt_name == SO_SNDBUF ? &so->so_snd : - &so->so_rcv)->ssb_flags &= ~SSB_AUTOSIZE; + sotmp = (sopt->sopt_name == SO_SNDBUF) ? + &so->so_snd : &so->so_rcv; + atomic_clear_int(&sotmp->ssb_flags, + SSB_AUTOSIZE); break; /* @@ -1405,13 +1412,15 @@ sosetopt(struct socket *so, struct sockopt *sopt) so->so_snd.ssb_lowat = (optval > so->so_snd.ssb_hiwat) ? so->so_snd.ssb_hiwat : optval; - so->so_snd.ssb_flags &= ~SSB_AUTOLOWAT; + atomic_clear_int(&so->so_snd.ssb_flags, + SSB_AUTOLOWAT); break; case SO_RCVLOWAT: so->so_rcv.ssb_lowat = (optval > so->so_rcv.ssb_hiwat) ? so->so_rcv.ssb_hiwat : optval; - so->so_rcv.ssb_flags &= ~SSB_AUTOLOWAT; + atomic_clear_int(&so->so_rcv.ssb_flags, + SSB_AUTOLOWAT); break; } break; @@ -1728,7 +1737,7 @@ sokqfilter(struct file *fp, struct knote *kn) } knote_insert(&ssb->ssb_kq.ki_note, kn); - ssb->ssb_flags |= SSB_KNOTE; + atomic_set_int(&ssb->ssb_flags, SSB_KNOTE); return (0); } @@ -1739,7 +1748,7 @@ filt_sordetach(struct knote *kn) knote_remove(&so->so_rcv.ssb_kq.ki_note, kn); if (SLIST_EMPTY(&so->so_rcv.ssb_kq.ki_note)) - so->so_rcv.ssb_flags &= ~SSB_KNOTE; + atomic_clear_int(&so->so_rcv.ssb_flags, SSB_KNOTE); } /*ARGSUSED*/ @@ -1780,7 +1789,7 @@ filt_sowdetach(struct knote *kn) knote_remove(&so->so_snd.ssb_kq.ki_note, kn); if (SLIST_EMPTY(&so->so_snd.ssb_kq.ki_note)) - so->so_snd.ssb_flags &= ~SSB_KNOTE; + atomic_clear_int(&so->so_snd.ssb_flags, SSB_KNOTE); } /*ARGSUSED*/ diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c index 05eb2d0798..4ad52b7ce1 100644 --- a/sys/kern/uipc_socket2.c +++ b/sys/kern/uipc_socket2.c @@ -76,16 +76,51 @@ static u_long sb_efficiency = 8; /* parameter for sbreserve() */ /* * Wait for data to arrive at/drain from a socket buffer. + * + * NOTE: Caller must generally hold the ssb_lock (client side lock) since + * WAIT/WAKEUP only works for one client at a time. + * + * NOTE: Caller always retries whatever operation it was waiting on. */ int ssb_wait(struct signalsockbuf *ssb) { + uint32_t flags; + int pflags; + int error; + + pflags = (ssb->ssb_flags & SSB_NOINTR) ? 0 : PCATCH; + + for (;;) { + flags = ssb->ssb_flags; + cpu_ccfence(); + + /* + * WAKEUP and WAIT interlock eachother. We can catch the + * race by checking to see if WAKEUP has already been set, + * and only setting WAIT if WAKEUP is clear. + */ + if (flags & SSB_WAKEUP) { + if (atomic_cmpset_int(&ssb->ssb_flags, flags, + flags & ~SSB_WAKEUP)) { + error = 0; + break; + } + continue; + } - ssb->ssb_flags |= SSB_WAIT; - return (tsleep((caddr_t)&ssb->ssb_cc, - ((ssb->ssb_flags & SSB_NOINTR) ? 0 : PCATCH), - "sbwait", - ssb->ssb_timeo)); + /* + * Only set WAIT if WAKEUP is clear. + */ + tsleep_interlock(&ssb->ssb_cc, pflags); + if (atomic_cmpset_int(&ssb->ssb_flags, flags, + flags | SSB_WAIT)) { + error = tsleep(&ssb->ssb_cc, pflags | PINTERLOCKED, + "sbwait", ssb->ssb_timeo); + break; + } + } + return (error); } /* @@ -95,18 +130,34 @@ ssb_wait(struct signalsockbuf *ssb) int _ssb_lock(struct signalsockbuf *ssb) { + uint32_t flags; + int pflags; int error; - while (ssb->ssb_flags & SSB_LOCK) { - ssb->ssb_flags |= SSB_WANT; - error = tsleep((caddr_t)&ssb->ssb_flags, - ((ssb->ssb_flags & SSB_NOINTR) ? 0 : PCATCH), - "sblock", 0); - if (error) - return (error); + pflags = (ssb->ssb_flags & SSB_NOINTR) ? 0 : PCATCH; + + for (;;) { + flags = ssb->ssb_flags; + cpu_ccfence(); + if (flags & SSB_LOCK) { + tsleep_interlock(&ssb->ssb_flags, pflags); + if (atomic_cmpset_int(&ssb->ssb_flags, flags, + flags | SSB_WANT)) { + error = tsleep(&ssb->ssb_flags, + pflags | PINTERLOCKED, + "sblock", 0); + if (error) + break; + } + } else { + if (atomic_cmpset_int(&ssb->ssb_flags, flags, + flags | SSB_LOCK)) { + error = 0; + break; + } + } } - ssb->ssb_flags |= SSB_LOCK; - return (0); + return (error); } /* @@ -176,7 +227,7 @@ soisconnected(struct socket *so) if ((so->so_options & SO_ACCEPTFILTER) != 0) { so->so_upcall = head->so_accf->so_accept_filter->accf_callback; so->so_upcallarg = head->so_accf->so_accept_filter_arg; - so->so_rcv.ssb_flags |= SSB_UPCALL; + atomic_set_int(&so->so_rcv.ssb_flags, SSB_UPCALL); so->so_options &= ~SO_ACCEPTFILTER; so->so_upcall(so, so->so_upcallarg, 0); return; @@ -350,17 +401,36 @@ void sowakeup(struct socket *so, struct signalsockbuf *ssb) { struct kqinfo *kqinfo = &ssb->ssb_kq; + uint32_t flags; + + /* + * Check conditions, set the WAKEUP flag, and clear and signal if + * the WAIT flag is found to be set. This interlocks against the + * client side. + */ + for (;;) { + flags = ssb->ssb_flags; + cpu_ccfence(); - if (ssb->ssb_flags & SSB_WAIT) { if ((ssb == &so->so_snd && ssb_space(ssb) >= ssb->ssb_lowat) || (ssb == &so->so_rcv && ssb->ssb_cc >= ssb->ssb_lowat) || (ssb == &so->so_snd && (so->so_state & SS_CANTSENDMORE)) || (ssb == &so->so_rcv && (so->so_state & SS_CANTRCVMORE)) ) { - ssb->ssb_flags &= ~SSB_WAIT; - wakeup((caddr_t)&ssb->ssb_cc); + if (atomic_cmpset_int(&ssb->ssb_flags, flags, + (flags | SSB_WAKEUP) & ~SSB_WAIT)) { + if (flags & SSB_WAIT) + wakeup(&ssb->ssb_cc); + break; + } + } else { + break; } } + + /* + * Misc other events + */ if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) pgsigio(so->so_sigio, SIGIO, 0); if (ssb->ssb_flags & SSB_UPCALL) @@ -379,7 +449,7 @@ sowakeup(struct socket *so, struct signalsockbuf *ssb) } } if (TAILQ_EMPTY(&ssb->ssb_kq.ki_mlist)) - ssb->ssb_flags &= ~SSB_MEVENT; + atomic_clear_int(&ssb->ssb_flags, SSB_MEVENT); } } @@ -418,7 +488,7 @@ int soreserve(struct socket *so, u_long sndcc, u_long rcvcc, struct rlimit *rl) { if (so->so_snd.ssb_lowat == 0) - so->so_snd.ssb_flags |= SSB_AUTOLOWAT; + atomic_set_int(&so->so_snd.ssb_flags, SSB_AUTOLOWAT); if (ssb_reserve(&so->so_snd, sndcc, so, rl) == 0) goto bad; if (ssb_reserve(&so->so_rcv, rcvcc, so, rl) == 0) diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index c0712dde72..5ec5ea55a3 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -256,7 +256,7 @@ uipc_rcvd(struct socket *so, int flags) if (so->so_rcv.ssb_cc < so2->so_snd.ssb_hiwat && so->so_rcv.ssb_mbcnt < so2->so_snd.ssb_mbmax ) { - so2->so_snd.ssb_flags &= ~SSB_STOP; + atomic_clear_int(&so2->so_snd.ssb_flags, SSB_STOP); sowwakeup(so2); } break; @@ -376,7 +376,7 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, if (so2->so_rcv.ssb_cc >= so->so_snd.ssb_hiwat || so2->so_rcv.ssb_mbcnt >= so->so_snd.ssb_mbmax ) { - so->so_snd.ssb_flags |= SSB_STOP; + atomic_set_int(&so->so_snd.ssb_flags, SSB_STOP); } sorwakeup(so2); break; diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index 3e61e85ee6..d5a66fdd16 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -436,8 +436,10 @@ aio_proc_rundown(struct proc *p) so = (struct socket *)fp->f_data; TAILQ_REMOVE(&so->so_aiojobq, aiocbe, list); if (TAILQ_EMPTY(&so->so_aiojobq)) { - so->so_snd.ssb_flags &= ~SSB_AIO; - so->so_rcv.ssb_flags &= ~SSB_AIO; + atomic_clear_int(&so->so_snd.ssb_flags, + SSB_AIO); + atomic_clear_int(&so->so_rcv.ssb_flags, + SSB_AIO); } } TAILQ_REMOVE(&ki->kaio_sockqueue, aiocbe, plist); @@ -1048,10 +1050,10 @@ aio_swake(struct socket *so, struct signalsockbuf *ssb) if (ssb == &so->so_snd) { opcode = LIO_WRITE; - so->so_snd.ssb_flags &= ~SSB_AIO; + atomic_clear_int(&so->so_snd.ssb_flags, SSB_AIO); } else { opcode = LIO_READ; - so->so_rcv.ssb_flags &= ~SSB_AIO; + atomic_clear_int(&so->so_rcv.ssb_flags, SSB_AIO); } for (cb = TAILQ_FIRST(&so->so_aiojobq); cb; cb = cbn) { @@ -1255,9 +1257,9 @@ no_kqueue: TAILQ_INSERT_TAIL(&so->so_aiojobq, aiocbe, list); TAILQ_INSERT_TAIL(&ki->kaio_sockqueue, aiocbe, plist); if (opcode == LIO_READ) - so->so_rcv.ssb_flags |= SSB_AIO; + atomic_set_int(&so->so_rcv.ssb_flags, SSB_AIO); else - so->so_snd.ssb_flags |= SSB_AIO; + atomic_set_int(&so->so_snd.ssb_flags, SSB_AIO); aiocbe->jobstate = JOBST_JOBQGLOBAL; /* XXX */ ki->kaio_queue_count++; num_queue_count++; diff --git a/sys/net/accf_data/accf_data.c b/sys/net/accf_data/accf_data.c index 8bc978c4ae..50da84da46 100644 --- a/sys/net/accf_data/accf_data.c +++ b/sys/net/accf_data/accf_data.c @@ -76,7 +76,7 @@ sohasdata(struct socket *so, void *arg, int waitflag) } so->so_upcall = NULL; - so->so_rcv.ssb_flags &= ~SSB_UPCALL; + atomic_clear_int(&so->so_rcv.ssb_flags, SSB_UPCALL); soisconnected(so); return; } diff --git a/sys/net/accf_http/accf_http.c b/sys/net/accf_http/accf_http.c index 1b750c0e06..2592028aeb 100644 --- a/sys/net/accf_http/accf_http.c +++ b/sys/net/accf_http/accf_http.c @@ -217,7 +217,7 @@ sohashttpget(struct socket *so, void *arg, int waitflag) fallout: DPRINT("fallout"); so->so_upcall = NULL; - so->so_rcv.ssb_flags &= ~SSB_UPCALL; + atomic_clear_int(&so->so_rcv.ssb_flags, SSB_UPCALL); soisconnected(so); return; } @@ -283,13 +283,13 @@ readmore: * we don't understand or a newline, so try again */ so->so_upcall = soparsehttpvers; - so->so_rcv.ssb_flags |= SSB_UPCALL; + atomic_set_int(&so->so_rcv.ssb_flags, SSB_UPCALL); return; fallout: DPRINT("fallout"); so->so_upcall = NULL; - so->so_rcv.ssb_flags &= ~SSB_UPCALL; + atomic_clear_int(&so->so_rcv.ssb_flags, SSB_UPCALL); soisconnected(so); return; } @@ -354,12 +354,12 @@ soishttpconnected(struct socket *so, void *arg, int waitflag) readmore: so->so_upcall = soishttpconnected; - so->so_rcv.ssb_flags |= SSB_UPCALL; + atomic_set_int(&so->so_rcv.ssb_flags, SSB_UPCALL); return; gotit: so->so_upcall = NULL; - so->so_rcv.ssb_flags &= ~SSB_UPCALL; + atomic_clear_int(&so->so_rcv.ssb_flags, SSB_UPCALL); soisconnected(so); return; } diff --git a/sys/netgraph/ksocket/ng_ksocket.c b/sys/netgraph/ksocket/ng_ksocket.c index 334023cc85..3cc3c737f7 100644 --- a/sys/netgraph/ksocket/ng_ksocket.c +++ b/sys/netgraph/ksocket/ng_ksocket.c @@ -612,8 +612,8 @@ ng_ksocket_newhook(node_p node, hook_p hook, const char *name0) /* Add our hook for incoming data and other events */ priv->so->so_upcallarg = (caddr_t)node; priv->so->so_upcall = ng_ksocket_incoming; - priv->so->so_rcv.ssb_flags |= SSB_UPCALL; - priv->so->so_snd.ssb_flags |= SSB_UPCALL; + atomic_set_int(&priv->so->so_rcv.ssb_flags, SSB_UPCALL); + atomic_set_int(&priv->so->so_snd.ssb_flags, SSB_UPCALL); } /* OK */ @@ -936,8 +936,8 @@ ng_ksocket_rmnode(node_p node) /* Close our socket (if any) */ if (priv->so != NULL) { priv->so->so_upcall = NULL; - priv->so->so_rcv.ssb_flags &= ~SSB_UPCALL; - priv->so->so_snd.ssb_flags &= ~SSB_UPCALL; + atomic_clear_int(&priv->so->so_rcv.ssb_flags, SSB_UPCALL); + atomic_clear_int(&priv->so->so_snd.ssb_flags, SSB_UPCALL); soclose(priv->so, FNONBLOCK); priv->so = NULL; } @@ -1206,8 +1206,8 @@ ng_ksocket_finish_accept(priv_p priv, struct ng_mesg **rptr) so->so_upcallarg = (caddr_t)node2; so->so_upcall = ng_ksocket_incoming; - so->so_rcv.ssb_flags |= SSB_UPCALL; - so->so_snd.ssb_flags |= SSB_UPCALL; + atomic_set_int(&so->so_rcv.ssb_flags, SSB_UPCALL); + atomic_set_int(&so->so_snd.ssb_flags, SSB_UPCALL); /* Fill in the response data and send it or return it to the caller */ resp_data = (struct ng_ksocket_accept *)resp->data; diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index c978d86930..ab9f352ecd 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -1345,11 +1345,11 @@ after_listen: tp->t_flags |= TF_RXRESIZED; if (!ssb_reserve(&so->so_rcv, newsize, so, NULL)) { - so->so_rcv.ssb_flags &= ~SSB_AUTOSIZE; + atomic_clear_int(&so->so_rcv.ssb_flags, SSB_AUTOSIZE); } if (newsize >= (TCP_MAXWIN << tp->rcv_scale)) { - so->so_rcv.ssb_flags &= ~SSB_AUTOSIZE; + atomic_clear_int(&so->so_rcv.ssb_flags, SSB_AUTOSIZE); } } m_adj(m, drop_hdrlen); /* delayed header drop */ diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 6268cba8d0..f14f7bb2da 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -373,9 +373,9 @@ again: tcp_autosndbuf_inc, tcp_autosndbuf_max); if (!ssb_reserve(&so->so_snd, newsize, so, NULL)) - so->so_snd.ssb_flags &= ~SSB_AUTOSIZE; + atomic_clear_int(&so->so_snd.ssb_flags, SSB_AUTOSIZE); if (newsize >= (TCP_MAXWIN << tp->snd_scale)) - so->so_snd.ssb_flags &= ~SSB_AUTOSIZE; + atomic_clear_int(&so->so_snd.ssb_flags, SSB_AUTOSIZE); } } diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index b3a456fdb7..2493b258ec 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -1384,8 +1384,8 @@ tcp_attach(struct socket *so, struct pru_attach_info *ai) if (error) return (error); } - so->so_rcv.ssb_flags |= SSB_AUTOSIZE; - so->so_snd.ssb_flags |= SSB_AUTOSIZE; + atomic_set_int(&so->so_rcv.ssb_flags, SSB_AUTOSIZE); + atomic_set_int(&so->so_snd.ssb_flags, SSB_AUTOSIZE); cpu = mycpu->gd_cpuid; error = in_pcballoc(so, &tcbinfo[cpu]); if (error) diff --git a/sys/netproto/smb/smb_trantcp.c b/sys/netproto/smb/smb_trantcp.c index aa5725410c..1f8c551a61 100644 --- a/sys/netproto/smb/smb_trantcp.c +++ b/sys/netproto/smb/smb_trantcp.c @@ -153,7 +153,7 @@ nb_connect_in(struct nbpcb *nbp, struct sockaddr_in *to, struct thread *td) nbp->nbp_tso = so; so->so_upcallarg = (caddr_t)nbp; so->so_upcall = nb_upcall; - so->so_rcv.ssb_flags |= SSB_UPCALL; + atomic_set_int(&so->so_rcv.ssb_flags, SSB_UPCALL); so->so_rcv.ssb_timeo = (5 * hz); so->so_snd.ssb_timeo = (5 * hz); error = soreserve(so, nbp->nbp_sndbuf, nbp->nbp_rcvbuf, @@ -162,16 +162,16 @@ nb_connect_in(struct nbpcb *nbp, struct sockaddr_in *to, struct thread *td) goto bad; nb_setsockopt_int(so, SOL_SOCKET, SO_KEEPALIVE, 1); nb_setsockopt_int(so, IPPROTO_TCP, TCP_NODELAY, 1); - so->so_rcv.ssb_flags &= ~SSB_NOINTR; - so->so_snd.ssb_flags &= ~SSB_NOINTR; + atomic_clear_int(&so->so_rcv.ssb_flags, SSB_NOINTR); + atomic_clear_int(&so->so_snd.ssb_flags, SSB_NOINTR); error = soconnect(so, (struct sockaddr*)to, td); /* * If signals are allowed nbssn_recv() can wind up in a hard loop * on EWOULDBLOCK. */ - so->so_rcv.ssb_flags |= SSB_NOINTR; - so->so_snd.ssb_flags |= SSB_NOINTR; + atomic_set_int(&so->so_rcv.ssb_flags, SSB_NOINTR); + atomic_set_int(&so->so_snd.ssb_flags, SSB_NOINTR); if (error) goto bad; crit_enter(); diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index cc999e66e1..1c996786d0 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -58,12 +58,16 @@ struct accept_filter; /* * Signaling socket buffers contain additional elements for locking * and signaling conditions. These are used primarily by sockets. + * + * WARNING: See partial clearing of fields in kern/uipc_socket.c + * sorflush() and sowflush(). */ struct signalsockbuf { struct sockbuf sb; struct kqinfo ssb_kq; /* process selecting read/write */ - short ssb_flags; /* flags, see below */ + uint32_t ssb_flags; /* flags, see below (use atomic ops) */ short ssb_timeo; /* timeout for read/write */ + short ssb_unused01; long ssb_lowat; /* low water mark */ u_long ssb_hiwat; /* high water mark / max actual char count */ u_long ssb_mbmax; /* max chars of mbufs to use */ @@ -73,18 +77,25 @@ struct signalsockbuf { #define ssb_mb sb.sb_mb /* commonly used fields */ #define ssb_mbcnt sb.sb_mbcnt /* commonly used fields */ -#define SSB_LOCK 0x01 /* lock on data queue */ -#define SSB_WANT 0x02 /* someone is waiting to lock */ -#define SSB_WAIT 0x04 /* someone is waiting for data/space */ -#define SSB_ASYNC 0x10 /* ASYNC I/O, need signals */ -#define SSB_UPCALL 0x20 /* someone wants an upcall */ -#define SSB_NOINTR 0x40 /* operations not interruptible */ -#define SSB_AIO 0x80 /* AIO operations queued */ -#define SSB_KNOTE 0x100 /* kernel note attached */ -#define SSB_MEVENT 0x200 /* need message event notification */ -#define SSB_STOP 0x400 /* backpressure indicator */ -#define SSB_AUTOSIZE 0x800 /* automatically size socket buffer */ +#define SSB_LOCK 0x0001 /* lock on data queue */ +#define SSB_WANT 0x0002 /* someone is waiting to lock */ +#define SSB_WAIT 0x0004 /* someone is waiting for data/space */ +#define SSB_ASYNC 0x0010 /* ASYNC I/O, need signals */ +#define SSB_UPCALL 0x0020 /* someone wants an upcall */ +#define SSB_NOINTR 0x0040 /* operations not interruptible */ +#define SSB_AIO 0x0080 /* AIO operations queued */ +#define SSB_KNOTE 0x0100 /* kernel note attached */ +#define SSB_MEVENT 0x0200 /* need message event notification */ +#define SSB_STOP 0x0400 /* backpressure indicator */ +#define SSB_AUTOSIZE 0x0800 /* automatically size socket buffer */ #define SSB_AUTOLOWAT 0x1000 /* automatically scale lowat */ +#define SSB_WAKEUP 0x2000 /* wakeup event race */ + +#define SSB_CLEAR_MASK (SSB_ASYNC | SSB_UPCALL | SSB_STOP | \ + SSB_AUTOSIZE | SSB_AUTOLOWAT) + +#define SSB_NOTIFY_MASK (SSB_WAIT | SSB_ASYNC | SSB_UPCALL | \ + SSB_AIO | SSB_KNOTE | SSB_MEVENT) /* * Per-socket kernel structure. Contains universal send and receive queues, @@ -211,11 +222,16 @@ struct xsocket { /* * Do we need to notify the other side when I/O is possible? + * + * NOTE: Interlock for ssb_wait/wakeup. The protocol side will set + * SSB_WAKEUP asynchronously and this can race, so if it isn't + * set we have to go through the full-on notification check. + * If it is set but no waiting ever takes place it simply + * remains set. */ -#define ssb_notify(ssb) \ - (((ssb)->ssb_flags & \ - (SSB_WAIT | SSB_ASYNC | SSB_UPCALL | \ - SSB_AIO | SSB_KNOTE | SSB_MEVENT))) +#define ssb_notify(ssb) \ + (((ssb)->ssb_flags & SSB_NOTIFY_MASK) || \ + ((ssb)->ssb_flags & SSB_WAKEUP) == 0) /* do we have to send all at once on a socket? */ @@ -262,29 +278,27 @@ ssb_space(struct signalsockbuf *ssb) ((ssb_space(ssb) <= 0) ? 0 : sbappendcontrol(&(ssb)->sb, m, control)) #define ssb_insert_knote(ssb, kn) { \ - lwkt_gettoken(&kq_token); \ - SLIST_INSERT_HEAD(&(ssb)->ssb_kq.ki_note, kn, kn_next); \ - lwkt_reltoken(&kq_token); \ - (ssb)->ssb_flags |= SSB_KNOTE; \ + knote_insert(&(ssb)->ssb_kq.ki_note, kn); \ + atomic_set_int(&(ssb)->ssb_flags, SSB_KNOTE); \ } #define ssb_remove_knote(ssb, kn) { \ - lwkt_gettoken(&kq_token); \ - SLIST_REMOVE(&(ssb)->ssb_kq.ki_note, kn, knote, kn_next); \ + knote_remove(&(ssb)->ssb_kq.ki_note, kn); \ if (SLIST_EMPTY(&(ssb)->ssb_kq.ki_note)) \ - (ssb)->ssb_flags &= ~SSB_KNOTE; \ - lwkt_reltoken(&kq_token); \ + atomic_clear_int(&(ssb)->ssb_flags, SSB_KNOTE); \ } -#define sorwakeup(so) do { \ - if (ssb_notify(&(so)->so_rcv)) \ - sowakeup((so), &(so)->so_rcv); \ - } while (0) - -#define sowwakeup(so) do { \ - if (ssb_notify(&(so)->so_snd)) \ - sowakeup((so), &(so)->so_snd); \ - } while (0) +#define sorwakeup(so) \ + do { \ + if (ssb_notify(&(so)->so_rcv)) \ + sowakeup((so), &(so)->so_rcv); \ + } while (0) + +#define sowwakeup(so) \ + do { \ + if (ssb_notify(&(so)->so_snd)) \ + sowakeup((so), &(so)->so_snd); \ + } while (0) #ifdef _KERNEL diff --git a/sys/sys/socketvar2.h b/sys/sys/socketvar2.h index 51c5d1ac77..899a7b2c4e 100644 --- a/sys/sys/socketvar2.h +++ b/sys/sys/socketvar2.h @@ -47,6 +47,9 @@ #ifndef _SYS_MALLOC_H_ #include #endif +#ifndef _MACHINE_ATOMIC_H_ +#include +#endif /* * Acquire a lock on a signalsockbuf, sleep if the lock is already held. @@ -57,26 +60,41 @@ static __inline int ssb_lock(struct signalsockbuf *ssb, int wf) { - if (ssb->ssb_flags & SSB_LOCK) { - if (wf == M_WAITOK) - return _ssb_lock(ssb); - return EWOULDBLOCK; - } else { - ssb->ssb_flags |= SSB_LOCK; - return 0; + uint32_t flags; + + for (;;) { + flags = ssb->ssb_flags; + cpu_ccfence(); + if (flags & SSB_LOCK) { + if (wf == M_WAITOK) + return _ssb_lock(ssb); + return EWOULDBLOCK; + } + if (atomic_cmpset_int(&ssb->ssb_flags, flags, flags | SSB_LOCK)) + return(0); } } /* * Release a previously acquired lock on a signalsockbuf. + * + * Interlocked wakeup if SSB_WANT was also set. */ static __inline void ssb_unlock(struct signalsockbuf *ssb) { - ssb->ssb_flags &= ~SSB_LOCK; - if (ssb->ssb_flags & SSB_WANT) { - ssb->ssb_flags &= ~SSB_WANT; - wakeup(&ssb->ssb_flags); + uint32_t flags; + + KKASSERT(ssb->ssb_flags & SSB_LOCK); + for (;;) { + flags = ssb->ssb_flags; + cpu_ccfence(); + if (atomic_cmpset_int(&ssb->ssb_flags, flags, + flags & ~(SSB_LOCK | SSB_WANT))) { + if (flags & SSB_WANT) + wakeup(&ssb->ssb_flags); + break; + } } } diff --git a/sys/vfs/nfs/nfs_socket.c b/sys/vfs/nfs/nfs_socket.c index c241c3cead..f2860976a3 100644 --- a/sys/vfs/nfs/nfs_socket.c +++ b/sys/vfs/nfs/nfs_socket.c @@ -319,8 +319,8 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep) error = soreserve(so, nfs_soreserve, nfs_soreserve, NULL); if (error) goto bad; - so->so_rcv.ssb_flags |= SSB_NOINTR; - so->so_snd.ssb_flags |= SSB_NOINTR; + atomic_set_int(&so->so_rcv.ssb_flags, SSB_NOINTR); + atomic_set_int(&so->so_snd.ssb_flags, SSB_NOINTR); /* Initialize other non-zero congestion variables */ nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = diff --git a/sys/vfs/nfs/nfs_syscalls.c b/sys/vfs/nfs/nfs_syscalls.c index 99d279d0a1..8c71179dab 100644 --- a/sys/vfs/nfs/nfs_syscalls.c +++ b/sys/vfs/nfs/nfs_syscalls.c @@ -391,9 +391,9 @@ nfssvc_addsock(struct file *fp, struct sockaddr *mynam, struct thread *td) val = 1; sosetopt(so, &sopt); } - so->so_rcv.ssb_flags &= ~SSB_NOINTR; + atomic_clear_int(&so->so_rcv.ssb_flags, SSB_NOINTR); so->so_rcv.ssb_timeo = 0; - so->so_snd.ssb_flags &= ~SSB_NOINTR; + atomic_clear_int(&so->so_snd.ssb_flags, SSB_NOINTR); so->so_snd.ssb_timeo = 0; slp = (struct nfssvc_sock *)kmalloc(sizeof (struct nfssvc_sock), @@ -410,7 +410,7 @@ nfssvc_addsock(struct file *fp, struct sockaddr *mynam, struct thread *td) crit_enter(); so->so_upcallarg = (caddr_t)slp; so->so_upcall = nfsrv_rcv; - so->so_rcv.ssb_flags |= SSB_UPCALL; + atomic_set_int(&so->so_rcv.ssb_flags, SSB_UPCALL); slp->ns_flag = (SLP_VALID | SLP_NEEDQ); nfsrv_wakenfsd(slp, 1); crit_exit(); @@ -714,7 +714,7 @@ nfsrv_zapsock(struct nfssvc_sock *slp) if (fp) { slp->ns_fp = NULL; so = slp->ns_so; - so->so_rcv.ssb_flags &= ~SSB_UPCALL; + atomic_clear_int(&so->so_rcv.ssb_flags, SSB_UPCALL); so->so_upcall = NULL; so->so_upcallarg = NULL; soshutdown(so, SHUT_RDWR); diff --git a/sys/vfs/portal/portal_vnops.c b/sys/vfs/portal/portal_vnops.c index 2425f6ac14..3951b602a4 100644 --- a/sys/vfs/portal/portal_vnops.c +++ b/sys/vfs/portal/portal_vnops.c @@ -298,8 +298,8 @@ portal_open(struct vop_open_args *ap) */ so->so_rcv.ssb_timeo = 0; so->so_snd.ssb_timeo = 0; - so->so_rcv.ssb_flags |= SSB_NOINTR; - so->so_snd.ssb_flags |= SSB_NOINTR; + atomic_set_int(&so->so_rcv.ssb_flags, SSB_NOINTR); + atomic_set_int(&so->so_snd.ssb_flags, SSB_NOINTR); pcred.pcr_flag = ap->a_mode; -- 2.41.0