kernel - Fix numerous MP issues with sockbuf's and ssb_flags part 1/2
authorMatthew Dillon <dillon@apollo.backplane.com>
Mon, 6 Sep 2010 20:34:01 +0000 (13:34 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Mon, 6 Sep 2010 20:34:01 +0000 (13:34 -0700)
* Use atomic ops for ssb_flags handling

* Use atomic_cmpset_int() to interlock SSB_LOCK with SSB_WANT, and
  SSB_WAIT with SSB_WAKEUP.

  Note in particular that WAIT/WAKEUP assumes the client side of the
  socket is single threaded via an appropriate lock.  This needs more
  work.

18 files changed:
sys/kern/sys_socket.c
sys/kern/uipc_msg.c
sys/kern/uipc_socket.c
sys/kern/uipc_socket2.c
sys/kern/uipc_usrreq.c
sys/kern/vfs_aio.c
sys/net/accf_data/accf_data.c
sys/net/accf_http/accf_http.c
sys/netgraph/ksocket/ng_ksocket.c
sys/netinet/tcp_input.c
sys/netinet/tcp_output.c
sys/netinet/tcp_usrreq.c
sys/netproto/smb/smb_trantcp.c
sys/sys/socketvar.h
sys/sys/socketvar2.h
sys/vfs/nfs/nfs_socket.c
sys/vfs/nfs/nfs_syscalls.c
sys/vfs/portal/portal_vnops.c

index 6990da6..0b0eeb0 100644 (file)
@@ -137,12 +137,12 @@ soo_ioctl(struct file *fp, u_long cmd, caddr_t data,
        case FIOASYNC:
                if (*(int *)data) {
                        so->so_state |= SS_ASYNC;
-                       so->so_rcv.ssb_flags |= SSB_ASYNC;
-                       so->so_snd.ssb_flags |= SSB_ASYNC;
+                       atomic_set_int(&so->so_rcv.ssb_flags,  SSB_ASYNC);
+                       atomic_set_int(&so->so_snd.ssb_flags, SSB_ASYNC);
                } else {
                        so->so_state &= ~SS_ASYNC;
-                       so->so_rcv.ssb_flags &= ~SSB_ASYNC;
-                       so->so_snd.ssb_flags &= ~SSB_ASYNC;
+                       atomic_clear_int(&so->so_rcv.ssb_flags, SSB_ASYNC);
+                       atomic_clear_int(&so->so_snd.ssb_flags, SSB_ASYNC);
                }
                error = 0;
                break;
index f1a5b05..02a895f 100644 (file)
@@ -623,8 +623,8 @@ netmsg_so_notify(netmsg_t netmsg)
        } else {
                lwkt_gettoken(&kq_token);
                TAILQ_INSERT_TAIL(&ssb->ssb_kq.ki_mlist, msg, nm_list);
+               atomic_set_int(&ssb->ssb_flags, SSB_MEVENT);
                lwkt_reltoken(&kq_token);
-               ssb->ssb_flags |= SSB_MEVENT;
        }
 }
 
index e170714..12bf054 100644 (file)
@@ -1178,19 +1178,23 @@ sorflush(struct socket *so)
        struct protosw *pr = so->so_proto;
        struct signalsockbuf asb;
 
-       ssb->ssb_flags |= SSB_NOINTR;
-       (void) ssb_lock(ssb, M_WAITOK);
+       atomic_set_int(&ssb->ssb_flags, SSB_NOINTR);
 
-       crit_enter();
+       ssb_lock(ssb, M_WAITOK);
        socantrcvmore(so);
-       ssb_unlock(ssb);
        asb = *ssb;
-       bzero((caddr_t)ssb, sizeof (*ssb));
-       if (asb.ssb_flags & SSB_KNOTE) {
-               ssb->ssb_kq.ki_note = asb.ssb_kq.ki_note;
-               ssb->ssb_flags |= SSB_KNOTE;
-       }
-       crit_exit();
+
+       /*
+        * Can't just blow up the ssb structure here
+        */
+       ssb->ssb_timeo = 0;
+       ssb->ssb_unused01 = 0;
+       ssb->ssb_lowat = 0;
+       ssb->ssb_hiwat = 0;
+       ssb->ssb_mbmax = 0;
+       atomic_clear_int(&ssb->ssb_flags, SSB_CLEAR_MASK);
+
+       ssb_unlock(ssb);
 
        if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
                (*pr->pr_domain->dom_dispose)(asb.ssb_mb);
@@ -1317,6 +1321,7 @@ sosetopt(struct socket *so, struct sockopt *sopt)
        struct  linger l;
        struct  timeval tv;
        u_long  val;
+       struct signalsockbuf *sotmp;
 
        error = 0;
        sopt->sopt_dir = SOPT_SET;
@@ -1393,8 +1398,10 @@ sosetopt(struct socket *so, struct sockopt *sopt)
                                        error = ENOBUFS;
                                        goto bad;
                                }
-                               (sopt->sopt_name == SO_SNDBUF ? &so->so_snd :
-                                   &so->so_rcv)->ssb_flags &= ~SSB_AUTOSIZE;
+                               sotmp = (sopt->sopt_name == SO_SNDBUF) ?
+                                               &so->so_snd : &so->so_rcv;
+                               atomic_clear_int(&sotmp->ssb_flags,
+                                                SSB_AUTOSIZE);
                                break;
 
                        /*
@@ -1405,13 +1412,15 @@ sosetopt(struct socket *so, struct sockopt *sopt)
                                so->so_snd.ssb_lowat =
                                    (optval > so->so_snd.ssb_hiwat) ?
                                    so->so_snd.ssb_hiwat : optval;
-                               so->so_snd.ssb_flags &= ~SSB_AUTOLOWAT;
+                               atomic_clear_int(&so->so_snd.ssb_flags,
+                                                SSB_AUTOLOWAT);
                                break;
                        case SO_RCVLOWAT:
                                so->so_rcv.ssb_lowat =
                                    (optval > so->so_rcv.ssb_hiwat) ?
                                    so->so_rcv.ssb_hiwat : optval;
-                               so->so_rcv.ssb_flags &= ~SSB_AUTOLOWAT;
+                               atomic_clear_int(&so->so_rcv.ssb_flags,
+                                                SSB_AUTOLOWAT);
                                break;
                        }
                        break;
@@ -1728,7 +1737,7 @@ sokqfilter(struct file *fp, struct knote *kn)
        }
 
        knote_insert(&ssb->ssb_kq.ki_note, kn);
-       ssb->ssb_flags |= SSB_KNOTE;
+       atomic_set_int(&ssb->ssb_flags, SSB_KNOTE);
        return (0);
 }
 
@@ -1739,7 +1748,7 @@ filt_sordetach(struct knote *kn)
 
        knote_remove(&so->so_rcv.ssb_kq.ki_note, kn);
        if (SLIST_EMPTY(&so->so_rcv.ssb_kq.ki_note))
-               so->so_rcv.ssb_flags &= ~SSB_KNOTE;
+               atomic_clear_int(&so->so_rcv.ssb_flags, SSB_KNOTE);
 }
 
 /*ARGSUSED*/
@@ -1780,7 +1789,7 @@ filt_sowdetach(struct knote *kn)
 
        knote_remove(&so->so_snd.ssb_kq.ki_note, kn);
        if (SLIST_EMPTY(&so->so_snd.ssb_kq.ki_note))
-               so->so_snd.ssb_flags &= ~SSB_KNOTE;
+               atomic_clear_int(&so->so_snd.ssb_flags, SSB_KNOTE);
 }
 
 /*ARGSUSED*/
index 05eb2d0..4ad52b7 100644 (file)
@@ -76,16 +76,51 @@ static      u_long sb_efficiency = 8;       /* parameter for sbreserve() */
 
 /*
  * Wait for data to arrive at/drain from a socket buffer.
+ *
+ * NOTE: Caller must generally hold the ssb_lock (client side lock) since
+ *      WAIT/WAKEUP only works for one client at a time.
+ *
+ * NOTE: Caller always retries whatever operation it was waiting on.
  */
 int
 ssb_wait(struct signalsockbuf *ssb)
 {
+       uint32_t flags;
+       int pflags;
+       int error;
+
+       pflags = (ssb->ssb_flags & SSB_NOINTR) ? 0 : PCATCH;
+
+       for (;;) {
+               flags = ssb->ssb_flags;
+               cpu_ccfence();
+
+               /*
+                * WAKEUP and WAIT interlock eachother.  We can catch the
+                * race by checking to see if WAKEUP has already been set,
+                * and only setting WAIT if WAKEUP is clear.
+                */
+               if (flags & SSB_WAKEUP) {
+                       if (atomic_cmpset_int(&ssb->ssb_flags, flags,
+                                             flags & ~SSB_WAKEUP)) {
+                               error = 0;
+                               break;
+                       }
+                       continue;
+               }
 
-       ssb->ssb_flags |= SSB_WAIT;
-       return (tsleep((caddr_t)&ssb->ssb_cc,
-                       ((ssb->ssb_flags & SSB_NOINTR) ? 0 : PCATCH),
-                       "sbwait",
-                       ssb->ssb_timeo));
+               /*
+                * Only set WAIT if WAKEUP is clear.
+                */
+               tsleep_interlock(&ssb->ssb_cc, pflags);
+               if (atomic_cmpset_int(&ssb->ssb_flags, flags,
+                                     flags | SSB_WAIT)) {
+                       error = tsleep(&ssb->ssb_cc, pflags | PINTERLOCKED,
+                                      "sbwait", ssb->ssb_timeo);
+                       break;
+               }
+       }
+       return (error);
 }
 
 /*
@@ -95,18 +130,34 @@ ssb_wait(struct signalsockbuf *ssb)
 int
 _ssb_lock(struct signalsockbuf *ssb)
 {
+       uint32_t flags;
+       int pflags;
        int error;
 
-       while (ssb->ssb_flags & SSB_LOCK) {
-               ssb->ssb_flags |= SSB_WANT;
-               error = tsleep((caddr_t)&ssb->ssb_flags,
-                           ((ssb->ssb_flags & SSB_NOINTR) ? 0 : PCATCH),
-                           "sblock", 0);
-               if (error)
-                       return (error);
+       pflags = (ssb->ssb_flags & SSB_NOINTR) ? 0 : PCATCH;
+
+       for (;;) {
+               flags = ssb->ssb_flags;
+               cpu_ccfence();
+               if (flags & SSB_LOCK) {
+                       tsleep_interlock(&ssb->ssb_flags, pflags);
+                       if (atomic_cmpset_int(&ssb->ssb_flags, flags,
+                                             flags | SSB_WANT)) {
+                               error = tsleep(&ssb->ssb_flags,
+                                              pflags | PINTERLOCKED,
+                                              "sblock", 0);
+                               if (error)
+                                       break;
+                       }
+               } else {
+                       if (atomic_cmpset_int(&ssb->ssb_flags, flags,
+                                             flags | SSB_LOCK)) {
+                               error = 0;
+                               break;
+                       }
+               }
        }
-       ssb->ssb_flags |= SSB_LOCK;
-       return (0);
+       return (error);
 }
 
 /*
@@ -176,7 +227,7 @@ soisconnected(struct socket *so)
                if ((so->so_options & SO_ACCEPTFILTER) != 0) {
                        so->so_upcall = head->so_accf->so_accept_filter->accf_callback;
                        so->so_upcallarg = head->so_accf->so_accept_filter_arg;
-                       so->so_rcv.ssb_flags |= SSB_UPCALL;
+                       atomic_set_int(&so->so_rcv.ssb_flags, SSB_UPCALL);
                        so->so_options &= ~SO_ACCEPTFILTER;
                        so->so_upcall(so, so->so_upcallarg, 0);
                        return;
@@ -350,17 +401,36 @@ void
 sowakeup(struct socket *so, struct signalsockbuf *ssb)
 {
        struct kqinfo *kqinfo = &ssb->ssb_kq;
+       uint32_t flags;
+
+       /*
+        * Check conditions, set the WAKEUP flag, and clear and signal if
+        * the WAIT flag is found to be set.  This interlocks against the
+        * client side.
+        */
+       for (;;) {
+               flags = ssb->ssb_flags;
+               cpu_ccfence();
 
-       if (ssb->ssb_flags & SSB_WAIT) {
                if ((ssb == &so->so_snd && ssb_space(ssb) >= ssb->ssb_lowat) ||
                    (ssb == &so->so_rcv && ssb->ssb_cc >= ssb->ssb_lowat) ||
                    (ssb == &so->so_snd && (so->so_state & SS_CANTSENDMORE)) ||
                    (ssb == &so->so_rcv && (so->so_state & SS_CANTRCVMORE))
                ) {
-                       ssb->ssb_flags &= ~SSB_WAIT;
-                       wakeup((caddr_t)&ssb->ssb_cc);
+                       if (atomic_cmpset_int(&ssb->ssb_flags, flags,
+                                         (flags | SSB_WAKEUP) & ~SSB_WAIT)) {
+                               if (flags & SSB_WAIT)
+                                       wakeup(&ssb->ssb_cc);
+                               break;
+                       }
+               } else {
+                       break;
                }
        }
+
+       /*
+        * Misc other events
+        */
        if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
                pgsigio(so->so_sigio, SIGIO, 0);
        if (ssb->ssb_flags & SSB_UPCALL)
@@ -379,7 +449,7 @@ sowakeup(struct socket *so, struct signalsockbuf *ssb)
                        }
                }
                if (TAILQ_EMPTY(&ssb->ssb_kq.ki_mlist))
-                       ssb->ssb_flags &= ~SSB_MEVENT;
+                       atomic_clear_int(&ssb->ssb_flags, SSB_MEVENT);
        }
 }
 
@@ -418,7 +488,7 @@ int
 soreserve(struct socket *so, u_long sndcc, u_long rcvcc, struct rlimit *rl)
 {
        if (so->so_snd.ssb_lowat == 0)
-               so->so_snd.ssb_flags |= SSB_AUTOLOWAT;
+               atomic_set_int(&so->so_snd.ssb_flags, SSB_AUTOLOWAT);
        if (ssb_reserve(&so->so_snd, sndcc, so, rl) == 0)
                goto bad;
        if (ssb_reserve(&so->so_rcv, rcvcc, so, rl) == 0)
index c0712dd..5ec5ea5 100644 (file)
@@ -256,7 +256,7 @@ uipc_rcvd(struct socket *so, int flags)
                if (so->so_rcv.ssb_cc < so2->so_snd.ssb_hiwat &&
                    so->so_rcv.ssb_mbcnt < so2->so_snd.ssb_mbmax
                ) {
-                       so2->so_snd.ssb_flags &= ~SSB_STOP;
+                       atomic_clear_int(&so2->so_snd.ssb_flags, SSB_STOP);
                        sowwakeup(so2);
                }
                break;
@@ -376,7 +376,7 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
                if (so2->so_rcv.ssb_cc >= so->so_snd.ssb_hiwat ||
                    so2->so_rcv.ssb_mbcnt >= so->so_snd.ssb_mbmax
                ) {
-                       so->so_snd.ssb_flags |= SSB_STOP;
+                       atomic_set_int(&so->so_snd.ssb_flags, SSB_STOP);
                }
                sorwakeup(so2);
                break;
index 3e61e85..d5a66fd 100644 (file)
@@ -436,8 +436,10 @@ aio_proc_rundown(struct proc *p)
                        so = (struct socket *)fp->f_data;
                        TAILQ_REMOVE(&so->so_aiojobq, aiocbe, list);
                        if (TAILQ_EMPTY(&so->so_aiojobq)) {
-                               so->so_snd.ssb_flags &= ~SSB_AIO;
-                               so->so_rcv.ssb_flags &= ~SSB_AIO;
+                               atomic_clear_int(&so->so_snd.ssb_flags,
+                                                SSB_AIO);
+                               atomic_clear_int(&so->so_rcv.ssb_flags,
+                                                SSB_AIO);
                        }
                }
                TAILQ_REMOVE(&ki->kaio_sockqueue, aiocbe, plist);
@@ -1048,10 +1050,10 @@ aio_swake(struct socket *so, struct signalsockbuf *ssb)
 
        if (ssb == &so->so_snd) {
                opcode = LIO_WRITE;
-               so->so_snd.ssb_flags &= ~SSB_AIO;
+               atomic_clear_int(&so->so_snd.ssb_flags, SSB_AIO);
        } else {
                opcode = LIO_READ;
-               so->so_rcv.ssb_flags &= ~SSB_AIO;
+               atomic_clear_int(&so->so_rcv.ssb_flags, SSB_AIO);
        }
 
        for (cb = TAILQ_FIRST(&so->so_aiojobq); cb; cb = cbn) {
@@ -1255,9 +1257,9 @@ no_kqueue:
                        TAILQ_INSERT_TAIL(&so->so_aiojobq, aiocbe, list);
                        TAILQ_INSERT_TAIL(&ki->kaio_sockqueue, aiocbe, plist);
                        if (opcode == LIO_READ)
-                               so->so_rcv.ssb_flags |= SSB_AIO;
+                               atomic_set_int(&so->so_rcv.ssb_flags, SSB_AIO);
                        else
-                               so->so_snd.ssb_flags |= SSB_AIO;
+                               atomic_set_int(&so->so_snd.ssb_flags, SSB_AIO);
                        aiocbe->jobstate = JOBST_JOBQGLOBAL; /* XXX */
                        ki->kaio_queue_count++;
                        num_queue_count++;
index 8bc978c..50da84d 100644 (file)
@@ -76,7 +76,7 @@ sohasdata(struct socket *so, void *arg, int waitflag)
        }
 
        so->so_upcall = NULL;
-       so->so_rcv.ssb_flags &= ~SSB_UPCALL;
+       atomic_clear_int(&so->so_rcv.ssb_flags, SSB_UPCALL);
        soisconnected(so);
        return;
 }
index 1b750c0..2592028 100644 (file)
@@ -217,7 +217,7 @@ sohashttpget(struct socket *so, void *arg, int waitflag)
 fallout:
        DPRINT("fallout");
        so->so_upcall = NULL;
-       so->so_rcv.ssb_flags &= ~SSB_UPCALL;
+       atomic_clear_int(&so->so_rcv.ssb_flags, SSB_UPCALL);
        soisconnected(so);
        return;
 }
@@ -283,13 +283,13 @@ readmore:
         * we don't understand or a newline, so try again
         */
        so->so_upcall = soparsehttpvers;
-       so->so_rcv.ssb_flags |= SSB_UPCALL;
+       atomic_set_int(&so->so_rcv.ssb_flags, SSB_UPCALL);
        return;
 
 fallout:
        DPRINT("fallout");
        so->so_upcall = NULL;
-       so->so_rcv.ssb_flags &= ~SSB_UPCALL;
+       atomic_clear_int(&so->so_rcv.ssb_flags, SSB_UPCALL);
        soisconnected(so);
        return;
 }
@@ -354,12 +354,12 @@ soishttpconnected(struct socket *so, void *arg, int waitflag)
 
 readmore:
        so->so_upcall = soishttpconnected;
-       so->so_rcv.ssb_flags |= SSB_UPCALL;
+       atomic_set_int(&so->so_rcv.ssb_flags, SSB_UPCALL);
        return;
 
 gotit:
        so->so_upcall = NULL;
-       so->so_rcv.ssb_flags &= ~SSB_UPCALL;
+       atomic_clear_int(&so->so_rcv.ssb_flags, SSB_UPCALL);
        soisconnected(so);
        return;
 }
index 334023c..3cc3c73 100644 (file)
@@ -612,8 +612,8 @@ ng_ksocket_newhook(node_p node, hook_p hook, const char *name0)
                /* Add our hook for incoming data and other events */
                priv->so->so_upcallarg = (caddr_t)node;
                priv->so->so_upcall = ng_ksocket_incoming;
-               priv->so->so_rcv.ssb_flags |= SSB_UPCALL;
-               priv->so->so_snd.ssb_flags |= SSB_UPCALL;
+               atomic_set_int(&priv->so->so_rcv.ssb_flags, SSB_UPCALL);
+               atomic_set_int(&priv->so->so_snd.ssb_flags, SSB_UPCALL);
        }
 
        /* OK */
@@ -936,8 +936,8 @@ ng_ksocket_rmnode(node_p node)
        /* Close our socket (if any) */
        if (priv->so != NULL) {
                priv->so->so_upcall = NULL;
-               priv->so->so_rcv.ssb_flags &= ~SSB_UPCALL;
-               priv->so->so_snd.ssb_flags &= ~SSB_UPCALL;
+               atomic_clear_int(&priv->so->so_rcv.ssb_flags, SSB_UPCALL);
+               atomic_clear_int(&priv->so->so_snd.ssb_flags, SSB_UPCALL);
                soclose(priv->so, FNONBLOCK);
                priv->so = NULL;
        }
@@ -1206,8 +1206,8 @@ ng_ksocket_finish_accept(priv_p priv, struct ng_mesg **rptr)
 
        so->so_upcallarg = (caddr_t)node2;
        so->so_upcall = ng_ksocket_incoming;
-       so->so_rcv.ssb_flags |= SSB_UPCALL;
-       so->so_snd.ssb_flags |= SSB_UPCALL;
+       atomic_set_int(&so->so_rcv.ssb_flags, SSB_UPCALL);
+       atomic_set_int(&so->so_snd.ssb_flags, SSB_UPCALL);
 
        /* Fill in the response data and send it or return it to the caller */
        resp_data = (struct ng_ksocket_accept *)resp->data;
index c978d86..ab9f352 100644 (file)
@@ -1345,11 +1345,11 @@ after_listen:
                                        tp->t_flags |= TF_RXRESIZED;
                                        if (!ssb_reserve(&so->so_rcv, newsize,
                                                         so, NULL)) {
-                                               so->so_rcv.ssb_flags &= ~SSB_AUTOSIZE;
+                                               atomic_clear_int(&so->so_rcv.ssb_flags, SSB_AUTOSIZE);
                                        }
                                        if (newsize >=
                                            (TCP_MAXWIN << tp->rcv_scale)) {
-                                               so->so_rcv.ssb_flags &= ~SSB_AUTOSIZE;
+                                               atomic_clear_int(&so->so_rcv.ssb_flags, SSB_AUTOSIZE);
                                        }
                                }
                                m_adj(m, drop_hdrlen); /* delayed header drop */
index 6268cba..f14f7bb 100644 (file)
@@ -373,9 +373,9 @@ again:
                                         tcp_autosndbuf_inc,
                                        tcp_autosndbuf_max);
                        if (!ssb_reserve(&so->so_snd, newsize, so, NULL))
-                               so->so_snd.ssb_flags &= ~SSB_AUTOSIZE;
+                               atomic_clear_int(&so->so_snd.ssb_flags, SSB_AUTOSIZE);
                        if (newsize >= (TCP_MAXWIN << tp->snd_scale))
-                               so->so_snd.ssb_flags &= ~SSB_AUTOSIZE;
+                               atomic_clear_int(&so->so_snd.ssb_flags, SSB_AUTOSIZE);
                }
        }
 
index b3a456f..2493b25 100644 (file)
@@ -1384,8 +1384,8 @@ tcp_attach(struct socket *so, struct pru_attach_info *ai)
                if (error)
                        return (error);
        }
-       so->so_rcv.ssb_flags |= SSB_AUTOSIZE;
-       so->so_snd.ssb_flags |= SSB_AUTOSIZE;
+       atomic_set_int(&so->so_rcv.ssb_flags, SSB_AUTOSIZE);
+       atomic_set_int(&so->so_snd.ssb_flags, SSB_AUTOSIZE);
        cpu = mycpu->gd_cpuid;
        error = in_pcballoc(so, &tcbinfo[cpu]);
        if (error)
index aa57254..1f8c551 100644 (file)
@@ -153,7 +153,7 @@ nb_connect_in(struct nbpcb *nbp, struct sockaddr_in *to, struct thread *td)
        nbp->nbp_tso = so;
        so->so_upcallarg = (caddr_t)nbp;
        so->so_upcall = nb_upcall;
-       so->so_rcv.ssb_flags |= SSB_UPCALL;
+       atomic_set_int(&so->so_rcv.ssb_flags, SSB_UPCALL);
        so->so_rcv.ssb_timeo = (5 * hz);
        so->so_snd.ssb_timeo = (5 * hz);
        error = soreserve(so, nbp->nbp_sndbuf, nbp->nbp_rcvbuf,
@@ -162,16 +162,16 @@ nb_connect_in(struct nbpcb *nbp, struct sockaddr_in *to, struct thread *td)
                goto bad;
        nb_setsockopt_int(so, SOL_SOCKET, SO_KEEPALIVE, 1);
        nb_setsockopt_int(so, IPPROTO_TCP, TCP_NODELAY, 1);
-       so->so_rcv.ssb_flags &= ~SSB_NOINTR;
-       so->so_snd.ssb_flags &= ~SSB_NOINTR;
+       atomic_clear_int(&so->so_rcv.ssb_flags, SSB_NOINTR);
+       atomic_clear_int(&so->so_snd.ssb_flags, SSB_NOINTR);
        error = soconnect(so, (struct sockaddr*)to, td);
 
        /*
         * If signals are allowed nbssn_recv() can wind up in a hard loop
         * on EWOULDBLOCK. 
         */
-       so->so_rcv.ssb_flags |= SSB_NOINTR;
-       so->so_snd.ssb_flags |= SSB_NOINTR;
+       atomic_set_int(&so->so_rcv.ssb_flags, SSB_NOINTR);
+       atomic_set_int(&so->so_snd.ssb_flags, SSB_NOINTR);
        if (error)
                goto bad;
        crit_enter();
index cc999e6..1c99678 100644 (file)
@@ -58,12 +58,16 @@ struct accept_filter;
 /*
  * Signaling socket buffers contain additional elements for locking
  * and signaling conditions.  These are used primarily by sockets.
+ *
+ * WARNING: See partial clearing of fields in kern/uipc_socket.c
+ *         sorflush() and sowflush().
  */
 struct signalsockbuf {
        struct sockbuf sb;
        struct kqinfo ssb_kq;   /* process selecting read/write */
-       short   ssb_flags;      /* flags, see below */
+       uint32_t ssb_flags;     /* flags, see below (use atomic ops) */
        short   ssb_timeo;      /* timeout for read/write */
+       short   ssb_unused01;
        long    ssb_lowat;      /* low water mark */
        u_long  ssb_hiwat;      /* high water mark / max actual char count */
        u_long  ssb_mbmax;      /* max chars of mbufs to use */
@@ -73,18 +77,25 @@ struct signalsockbuf {
 #define ssb_mb         sb.sb_mb        /* commonly used fields */
 #define ssb_mbcnt      sb.sb_mbcnt     /* commonly used fields */
 
-#define        SSB_LOCK        0x01            /* lock on data queue */
-#define        SSB_WANT        0x02            /* someone is waiting to lock */
-#define        SSB_WAIT        0x04            /* someone is waiting for data/space */
-#define        SSB_ASYNC       0x10            /* ASYNC I/O, need signals */
-#define        SSB_UPCALL      0x20            /* someone wants an upcall */
-#define        SSB_NOINTR      0x40            /* operations not interruptible */
-#define SSB_AIO                0x80            /* AIO operations queued */
-#define SSB_KNOTE      0x100           /* kernel note attached */
-#define SSB_MEVENT     0x200           /* need message event notification */
-#define SSB_STOP       0x400           /* backpressure indicator */
-#define        SSB_AUTOSIZE    0x800           /* automatically size socket buffer */
+#define        SSB_LOCK        0x0001          /* lock on data queue */
+#define        SSB_WANT        0x0002          /* someone is waiting to lock */
+#define        SSB_WAIT        0x0004          /* someone is waiting for data/space */
+#define        SSB_ASYNC       0x0010          /* ASYNC I/O, need signals */
+#define        SSB_UPCALL      0x0020          /* someone wants an upcall */
+#define        SSB_NOINTR      0x0040          /* operations not interruptible */
+#define SSB_AIO                0x0080          /* AIO operations queued */
+#define SSB_KNOTE      0x0100          /* kernel note attached */
+#define SSB_MEVENT     0x0200          /* need message event notification */
+#define SSB_STOP       0x0400          /* backpressure indicator */
+#define        SSB_AUTOSIZE    0x0800          /* automatically size socket buffer */
 #define SSB_AUTOLOWAT  0x1000          /* automatically scale lowat */
+#define SSB_WAKEUP     0x2000          /* wakeup event race */
+
+#define SSB_CLEAR_MASK (SSB_ASYNC | SSB_UPCALL | SSB_STOP | \
+                        SSB_AUTOSIZE | SSB_AUTOLOWAT)
+
+#define SSB_NOTIFY_MASK        (SSB_WAIT | SSB_ASYNC | SSB_UPCALL | \
+                        SSB_AIO | SSB_KNOTE | SSB_MEVENT)
 
 /*
  * Per-socket kernel structure.  Contains universal send and receive queues,
@@ -211,11 +222,16 @@ struct    xsocket {
 
 /*
  * Do we need to notify the other side when I/O is possible?
+ *
+ * NOTE: Interlock for ssb_wait/wakeup.  The protocol side will set
+ *      SSB_WAKEUP asynchronously and this can race, so if it isn't
+ *      set we have to go through the full-on notification check.
+ *      If it is set but no waiting ever takes place it simply
+ *      remains set.
  */
-#define        ssb_notify(ssb)                                 \
-       (((ssb)->ssb_flags &                            \
-       (SSB_WAIT | SSB_ASYNC | SSB_UPCALL |            \
-       SSB_AIO | SSB_KNOTE | SSB_MEVENT)))
+#define ssb_notify(ssb)                                        \
+           (((ssb)->ssb_flags & SSB_NOTIFY_MASK) ||    \
+            ((ssb)->ssb_flags & SSB_WAKEUP) == 0)
 
 /* do we have to send all at once on a socket? */
 
@@ -262,29 +278,27 @@ ssb_space(struct signalsockbuf *ssb)
        ((ssb_space(ssb) <= 0) ? 0 : sbappendcontrol(&(ssb)->sb, m, control))
 
 #define ssb_insert_knote(ssb, kn) {                                    \
-       lwkt_gettoken(&kq_token);                                       \
-        SLIST_INSERT_HEAD(&(ssb)->ssb_kq.ki_note, kn, kn_next);                \
-       lwkt_reltoken(&kq_token);                                       \
-       (ssb)->ssb_flags |= SSB_KNOTE;                                  \
+       knote_insert(&(ssb)->ssb_kq.ki_note, kn);                       \
+       atomic_set_int(&(ssb)->ssb_flags, SSB_KNOTE);                   \
 }
 
 #define ssb_remove_knote(ssb, kn) {                                    \
-       lwkt_gettoken(&kq_token);                                       \
-        SLIST_REMOVE(&(ssb)->ssb_kq.ki_note, kn, knote, kn_next);      \
+       knote_remove(&(ssb)->ssb_kq.ki_note, kn);                       \
        if (SLIST_EMPTY(&(ssb)->ssb_kq.ki_note))                        \
-               (ssb)->ssb_flags &= ~SSB_KNOTE;                         \
-       lwkt_reltoken(&kq_token);                                       \
+               atomic_clear_int(&(ssb)->ssb_flags, SSB_KNOTE);         \
 }
 
-#define        sorwakeup(so)   do { \
-                         if (ssb_notify(&(so)->so_rcv)) \
-                           sowakeup((so), &(so)->so_rcv); \
-                       } while (0)
-
-#define        sowwakeup(so)   do { \
-                         if (ssb_notify(&(so)->so_snd)) \
-                           sowakeup((so), &(so)->so_snd); \
-                       } while (0)
+#define        sorwakeup(so)                                           \
+       do {                                                    \
+               if (ssb_notify(&(so)->so_rcv))                  \
+                       sowakeup((so), &(so)->so_rcv);          \
+       } while (0)
+
+#define        sowwakeup(so)                                           \
+       do {                                                    \
+               if (ssb_notify(&(so)->so_snd))                  \
+                       sowakeup((so), &(so)->so_snd);          \
+       } while (0)
 
 #ifdef _KERNEL
 
index 51c5d1a..899a7b2 100644 (file)
@@ -47,6 +47,9 @@
 #ifndef _SYS_MALLOC_H_
 #include <sys/malloc.h>
 #endif
+#ifndef _MACHINE_ATOMIC_H_
+#include <machine/atomic.h>
+#endif
 
 /*
  * Acquire a lock on a signalsockbuf, sleep if the lock is already held.
 static __inline int
 ssb_lock(struct signalsockbuf *ssb, int wf)
 {
-       if (ssb->ssb_flags & SSB_LOCK) {
-               if (wf == M_WAITOK)
-                       return _ssb_lock(ssb);
-               return EWOULDBLOCK;
-       } else {
-               ssb->ssb_flags |= SSB_LOCK;
-               return 0;
+       uint32_t flags;
+
+       for (;;) {
+               flags = ssb->ssb_flags;
+               cpu_ccfence();
+               if (flags & SSB_LOCK) {
+                       if (wf == M_WAITOK)
+                               return _ssb_lock(ssb);
+                       return EWOULDBLOCK;
+               }
+               if (atomic_cmpset_int(&ssb->ssb_flags, flags, flags | SSB_LOCK))
+                       return(0);
        }
 }
 
 /*
  * Release a previously acquired lock on a signalsockbuf.
+ *
+ * Interlocked wakeup if SSB_WANT was also set.
  */
 static __inline void
 ssb_unlock(struct signalsockbuf *ssb)
 {
-       ssb->ssb_flags &= ~SSB_LOCK;
-       if (ssb->ssb_flags & SSB_WANT) {
-               ssb->ssb_flags &= ~SSB_WANT;
-               wakeup(&ssb->ssb_flags);
+       uint32_t flags;
+
+       KKASSERT(ssb->ssb_flags & SSB_LOCK);
+       for (;;) {
+               flags = ssb->ssb_flags;
+               cpu_ccfence();
+               if (atomic_cmpset_int(&ssb->ssb_flags, flags,
+                                     flags & ~(SSB_LOCK | SSB_WANT))) {
+                       if (flags & SSB_WANT)
+                               wakeup(&ssb->ssb_flags);
+                       break;
+               }
        }
 }
 
index c241c3c..f286097 100644 (file)
@@ -319,8 +319,8 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
        error = soreserve(so, nfs_soreserve, nfs_soreserve, NULL);
        if (error)
                goto bad;
-       so->so_rcv.ssb_flags |= SSB_NOINTR;
-       so->so_snd.ssb_flags |= SSB_NOINTR;
+       atomic_set_int(&so->so_rcv.ssb_flags, SSB_NOINTR);
+       atomic_set_int(&so->so_snd.ssb_flags, SSB_NOINTR);
 
        /* Initialize other non-zero congestion variables */
        nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = 
index 99d279d..8c71179 100644 (file)
@@ -391,9 +391,9 @@ nfssvc_addsock(struct file *fp, struct sockaddr *mynam, struct thread *td)
                val = 1;
                sosetopt(so, &sopt);
        }
-       so->so_rcv.ssb_flags &= ~SSB_NOINTR;
+       atomic_clear_int(&so->so_rcv.ssb_flags, SSB_NOINTR);
        so->so_rcv.ssb_timeo = 0;
-       so->so_snd.ssb_flags &= ~SSB_NOINTR;
+       atomic_clear_int(&so->so_snd.ssb_flags, SSB_NOINTR);
        so->so_snd.ssb_timeo = 0;
 
        slp = (struct nfssvc_sock *)kmalloc(sizeof (struct nfssvc_sock),
@@ -410,7 +410,7 @@ nfssvc_addsock(struct file *fp, struct sockaddr *mynam, struct thread *td)
        crit_enter();
        so->so_upcallarg = (caddr_t)slp;
        so->so_upcall = nfsrv_rcv;
-       so->so_rcv.ssb_flags |= SSB_UPCALL;
+       atomic_set_int(&so->so_rcv.ssb_flags, SSB_UPCALL);
        slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
        nfsrv_wakenfsd(slp, 1);
        crit_exit();
@@ -714,7 +714,7 @@ nfsrv_zapsock(struct nfssvc_sock *slp)
        if (fp) {
                slp->ns_fp = NULL;
                so = slp->ns_so;
-               so->so_rcv.ssb_flags &= ~SSB_UPCALL;
+               atomic_clear_int(&so->so_rcv.ssb_flags, SSB_UPCALL);
                so->so_upcall = NULL;
                so->so_upcallarg = NULL;
                soshutdown(so, SHUT_RDWR);
index 2425f6a..3951b60 100644 (file)
@@ -298,8 +298,8 @@ portal_open(struct vop_open_args *ap)
         */
        so->so_rcv.ssb_timeo = 0;
        so->so_snd.ssb_timeo = 0;
-       so->so_rcv.ssb_flags |= SSB_NOINTR;
-       so->so_snd.ssb_flags |= SSB_NOINTR;
+       atomic_set_int(&so->so_rcv.ssb_flags, SSB_NOINTR);
+       atomic_set_int(&so->so_snd.ssb_flags, SSB_NOINTR);
 
 
        pcred.pcr_flag = ap->a_mode;