udp: Fix bind races due to async close and random socket inital msgport
authorSepherosa Ziehau <sephe@dragonflybsd.org>
Sun, 15 Nov 2015 14:02:55 +0000 (22:02 +0800)
committerSepherosa Ziehau <sephe@dragonflybsd.org>
Sun, 15 Nov 2015 14:24:00 +0000 (22:24 +0800)
Bind for UDP sockets is now serialized by local port based netisr.  And
on detach path, the UDP inpcb is removed from local port hash before
other netisrs are synchronized, so that the local port for the detaching
UDP inpcb could be recycled timely.

sys/netinet/in_pcb.c
sys/netinet/in_pcb.h
sys/netinet/udp_usrreq.c

index 351a131..0aefe6d 100644 (file)
@@ -1753,6 +1753,35 @@ in_pcbinsporthash_lport(struct inpcb *inp)
        REL_PORT_TOKEN(portinfo);
 }
 
+void
+in_pcbremporthash(struct inpcb *inp)
+{
+       struct inpcbportinfo *portinfo;
+       struct inpcbport *phd;
+
+       if (inp->inp_lport == 0)
+               return;
+
+       /*
+        * NOTE:
+        * inp->inp_portinfo is _not_ necessary same as
+        * inp->inp_pcbinfo->portinfo.
+        */
+       portinfo = inp->inp_portinfo;
+       GET_PORT_TOKEN(portinfo);
+
+       phd = inp->inp_phd;
+       LIST_REMOVE(inp, inp_portlist);
+       if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
+               LIST_REMOVE(phd, phd_hash);
+               kfree(phd, M_PCB);
+       }
+
+       REL_PORT_TOKEN(portinfo);
+
+       inp->inp_lport = 0;
+}
+
 static struct inp_localgroup *
 inp_localgroup_alloc(u_char af, uint16_t port,
     const union in_dependaddr *addr, int size)
@@ -2097,27 +2126,7 @@ in_pcbremwildcardhash(struct inpcb *inp)
 void
 in_pcbremlists(struct inpcb *inp)
 {
-       if (inp->inp_lport) {
-               struct inpcbportinfo *portinfo;
-               struct inpcbport *phd;
-
-               /*
-                * NOTE:
-                * inp->inp_portinfo is _not_ necessary same as
-                * inp->inp_pcbinfo->portinfo.
-                */
-               portinfo = inp->inp_portinfo;
-               GET_PORT_TOKEN(portinfo);
-
-               phd = inp->inp_phd;
-               LIST_REMOVE(inp, inp_portlist);
-               if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
-                       LIST_REMOVE(phd, phd_hash);
-                       kfree(phd, M_PCB);
-               }
-
-               REL_PORT_TOKEN(portinfo);
-       }
+       in_pcbremporthash(inp);
        if (inp->inp_flags & INP_WILDCARD) {
                in_pcbremwildcardhash(inp);
        } else if (inp->inp_flags & INP_CONNECTED) {
index 64fb977..693a727 100644 (file)
@@ -513,6 +513,7 @@ void        in_pcbinswildcardhash_oncpu(struct inpcb *, struct inpcbinfo *);
 void   in_pcbinsconnhash(struct inpcb *inp);
 void   in_pcbinsporthash (struct inpcbportinfo *, struct inpcb *);
 void   in_pcbinsporthash_lport (struct inpcb *);
+void   in_pcbremporthash (struct inpcb *);
 int    in_pcbladdr (struct inpcb *, struct sockaddr *,
            struct sockaddr_in **, struct thread *);
 int    in_pcbladdr_find (struct inpcb *, struct sockaddr *,
index 55c2f3a..3d1b977 100644 (file)
@@ -195,6 +195,12 @@ static boolean_t udp_inswildcardhash(struct inpcb *inp,
     struct netmsg_base *msg, int error);
 static void udp_remwildcardhash(struct inpcb *inp);
 
+static __inline int
+udp_lportcpu(short lport)
+{
+       return (ntohs(lport) & ncpus2_mask);
+}
+
 void
 udp_init(void)
 {
@@ -1320,8 +1326,7 @@ udp_inswildcardhash_dispatch(netmsg_t msg)
        boolean_t forwarded;
 
        KASSERT(inp->inp_lport != 0, ("local port not set yet"));
-       KASSERT((ntohs(inp->inp_lport) & ncpus2_mask) == mycpuid,
-           ("not target cpu"));
+       KASSERT(udp_lportcpu(inp->inp_lport) == mycpuid, ("not target cpu"));
 
        in_pcblink(inp, &udbinfo[mycpuid]);
 
@@ -1351,7 +1356,7 @@ udp_inswildcardhash(struct inpcb *inp, struct netmsg_base *msg, int error)
        in_pcbresetroute(inp);
 
        KASSERT(inp->inp_lport != 0, ("local port not set yet"));
-       cpu = ntohs(inp->inp_lport) & ncpus2_mask;
+       cpu = udp_lportcpu(inp->inp_lport);
 
        lmsg->ms_error = error;
        if (cpu != mycpuid) {
@@ -1385,10 +1390,65 @@ udp_bind(netmsg_t msg)
        if (inp) {
                struct sockaddr *nam = msg->bind.nm_nam;
                struct thread *td = msg->bind.nm_td;
+               struct sockaddr_in *sin;
+               lwkt_port_t port;
+               int cpu;
+
+               /*
+                * Check "already bound" here (in_pcbbind() does the same
+                * check though), so we don't forward a connected/bound
+                * socket randomly which would panic in the following
+                * in_pcbunlink().
+                */
+               if (inp->inp_lport != 0 ||
+                   inp->inp_laddr.s_addr != INADDR_ANY) {
+                       error = EINVAL; /* already bound */
+                       goto done;
+               }
+
+               if (nam->sa_len != sizeof(*sin)) {
+                       error = EINVAL;
+                       goto done;
+               }
+               sin = (struct sockaddr_in *)nam;
+
+               cpu = udp_lportcpu(sin->sin_port);
+               port = netisr_cpuport(cpu);
+
+               /*
+                * See the related comment in tcp_usrreq.c tcp_usr_bind().
+                * The exception is that we use local port based netisr
+                * to serialize in_pcbbind().
+                */
+               if (&curthread->td_msgport != port) {
+                       lwkt_msg_t lmsg = &msg->bind.base.lmsg;
+
+                       KASSERT((msg->bind.nm_flags & PRUB_RELINK) == 0,
+                           ("already asked to relink"));
+
+                       in_pcbunlink(so->so_pcb, &udbinfo[mycpuid]);
+                       msg->bind.nm_flags |= PRUB_RELINK;
+
+                       /*
+                        * See the related comment in tcp_usrreq.c
+                        * tcp_connect().
+                        */
+                       lwkt_setmsg_receipt(lmsg, udp_sosetport);
+                       lwkt_forwardmsg(port, lmsg);
+                       /* msg invalid now */
+                       return;
+               }
+               KASSERT(so->so_port == port, ("so_port is not netisr%d", cpu));
+
+               if (msg->bind.nm_flags & PRUB_RELINK) {
+                       msg->bind.nm_flags &= ~PRUB_RELINK;
+                       in_pcblink(so->so_pcb, &udbinfo[mycpuid]);
+               }
+               KASSERT(inp->inp_pcbinfo == &udbinfo[cpu],
+                   ("pcbinfo is not udbinfo%d", cpu));
 
                error = in_pcbbind(inp, nam, td);
                if (error == 0) {
-                       struct sockaddr_in *sin = (struct sockaddr_in *)nam;
                        boolean_t forwarded;
 
                        if (sin->sin_addr.s_addr != INADDR_ANY)
@@ -1407,6 +1467,7 @@ udp_bind(netmsg_t msg)
        } else {
                error = EINVAL;
        }
+done:
        lwkt_replymsg(&msg->bind.base.lmsg, error);
 }
 
@@ -1710,6 +1771,13 @@ udp_detach(netmsg_t msg)
         */
        in_pcbofflist(inp);
 
+       /*
+        * Remove this inpcb from the local port hash directly
+        * here, so that its bound local port could be recycled
+        * timely.
+        */
+       in_pcbremporthash(inp);
+
        if (inp->inp_flags & INP_DIRECT_DETACH) {
                /*
                 * Direct detaching is allowed