kernel - make POLLHUP conform better to OpenGroup
authorMatthew Dillon <dillon@apollo.backplane.com>
Fri, 13 Aug 2010 22:06:12 +0000 (15:06 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Fri, 13 Aug 2010 22:06:12 +0000 (15:06 -0700)
* The poll() backend now only evaluates EV_EOF for EVFILT_WRITE, and
  generates a POLLHUP which is properly mutually exclusive with
  POLLWRNORM and POLLOUT.

  We do this even for a half-closed (write-side-closed) socket.

* We no longer set POLLHUP for POLLRD* or POLLIN, instead relying on
  the userland doing the read() and checking the 0 result (which
  userland has always done historically).

  EV_EOF for EVFILT_READ cannot be used to set POLLHUP for POLLRD* or
  POLLIN because it could indicate a half-closed connection
  (read-side-closed) where writing is still allowed, which is a more
  common situation on sockets.

  We would have to add another EV_ flag to set POLLHUP for POLLRD* or
  POLLIN to detect the fully disconnected state.  The OpenGroup standard
  does allow POLLHUP to be mixed with POLLRD* or POLLIN but for now we
  just don't set it at all in that case.

  Only a POLLOUT/POLLWRNORM flag request can cause POLLHUP to be set
  for now.

sys/kern/sys_generic.c

index 733326b..9a36c33 100644 (file)
@@ -1358,23 +1358,46 @@ poll_copyout(void *arg, struct kevent *kevp, int count, int *res)
                                continue;
                        }
 
-                       if (kevp[i].flags & EV_EOF)
-                               pfd->revents |= POLLHUP;
-
                        switch (kevp[i].filter) {
                        case EVFILT_READ:
+#if 0
+                               /*
+                                * EOF on the read side can indicate a
+                                * half-closed situation and not necessarily
+                                * a disconnect, so depend on the user
+                                * issuing a read() and getting 0 bytes back.
+                                */
+                               if (kevp[i].flags & EV_EOF)
+                                       pfd->revents |= POLLHUP;
+#endif
                                if (pfd->events & POLLIN)
                                        pfd->revents |= POLLIN;
                                if (pfd->events & POLLRDNORM)
                                        pfd->revents |= POLLRDNORM;
                                break;
                        case EVFILT_WRITE:
-                               if (pfd->events & POLLOUT)
-                                       pfd->revents |= POLLOUT;
-                               if (pfd->events & POLLWRNORM)
-                                       pfd->revents |= POLLWRNORM;
+                               /*
+                                * As per the OpenGroup POLLHUP is mutually
+                                * exclusive with the writability flags.  I
+                                * consider this a bit broken but...
+                                *
+                                * In this case a disconnect is implied even
+                                * for a half-closed (write side) situation.
+                                */
+                               if (kevp[i].flags & EV_EOF) {
+                                       pfd->revents |= POLLHUP;
+                               } else {
+                                       if (pfd->events & POLLOUT)
+                                               pfd->revents |= POLLOUT;
+                                       if (pfd->events & POLLWRNORM)
+                                               pfd->revents |= POLLWRNORM;
+                               }
                                break;
                        case EVFILT_EXCEPT:
+                               /*
+                                * EV_EOF should never be tagged for this
+                                * filter.
+                                */
                                if (pfd->events & POLLPRI)
                                        pfd->revents |= POLLPRI;
                                if (pfd->events & POLLRDBAND)