kernel - Major signal path adjustments to fix races, tsleep race fixes, +more
[dragonfly.git] / sys / kern / sys_generic.c
index 8f70272..9e46223 100644 (file)
@@ -108,7 +108,7 @@ struct poll_kevent_copyin_args {
        int             error;
 };
 
-static struct lwkt_token mioctl_token = LWKT_TOKEN_MP_INITIALIZER(mioctl_token);
+static struct lwkt_token mioctl_token = LWKT_TOKEN_INITIALIZER(mioctl_token);
 
 static int     doselect(int nd, fd_set *in, fd_set *ou, fd_set *ex,
                         struct timespec *ts, int *res);
@@ -519,11 +519,8 @@ dofilewrite(int fd, struct file *fp, struct uio *auio, int flags, size_t *res)
                    error == EINTR || error == EWOULDBLOCK))
                        error = 0;
                /* Socket layer is responsible for issuing SIGPIPE. */
-               if (error == EPIPE) {
-                       get_mplock();
+               if (error == EPIPE)
                        lwpsignal(lp->lwp_proc, lp, SIGPIPE);
-                       rel_mplock();
-               }
        }
 #ifdef KTRACE
        if (ktriov != NULL) {
@@ -841,8 +838,6 @@ sys_select(struct select_args *uap)
 
 /*
  * Pselect system call.
- *
- * MPALMOSTSAFE
  */
 int
 sys_pselect(struct pselect_args *uap)
@@ -872,12 +867,11 @@ sys_pselect(struct pselect_args *uap)
                error = copyin(uap->sigmask, &sigmask, sizeof(sigmask));
                if (error)
                        return (error);
-               get_mplock();
+               lwkt_gettoken(&lp->lwp_proc->p_token);
                lp->lwp_oldsigmask = lp->lwp_sigmask;
                SIG_CANTMASK(sigmask);
                lp->lwp_sigmask = sigmask;
-       } else {
-               get_mplock();
+               lwkt_reltoken(&lp->lwp_proc->p_token);
        }
 
        /*
@@ -887,6 +881,7 @@ sys_pselect(struct pselect_args *uap)
                         &uap->sysmsg_result);
 
        if (uap->sigmask != NULL) {
+               lwkt_gettoken(&lp->lwp_proc->p_token);
                /* doselect() responsible for turning ERESTART into EINTR */
                KKASSERT(error != ERESTART);
                if (error == EINTR) {
@@ -896,15 +891,15 @@ sys_pselect(struct pselect_args *uap)
                         * us.  So make a note to restore it after executing
                         * the handler.
                         */
-                       lp->lwp_flag |= LWP_OLDMASK;
+                       lp->lwp_flags |= LWP_OLDMASK;
                } else {
                        /*
                         * No handler to run. Restore previous mask immediately.
                         */
                        lp->lwp_sigmask = lp->lwp_oldsigmask;
                }
+               lwkt_reltoken(&lp->lwp_proc->p_token);
        }
-       rel_mplock();
 
        return (error);
 }
@@ -1026,16 +1021,18 @@ select_copyout(void *arg, struct kevent *kevp, int count, int *res)
                 * Handle errors
                 */
                if (kevp[i].flags & EV_ERROR) {
-                       switch(kevp[i].data) {
+                       int error = kevp[i].data;
+
+                       switch (error) {
                        case EBADF:
                                /*
                                 * A bad file descriptor is considered a
                                 * fatal error for select, bail out.
                                 */
-                               skap->error = EBADF;
-                               *res = 0;
-                               return (1);
-                               break;
+                               skap->error = error;
+                               *res = -1;
+                               return error;
+
                        default:
                                /*
                                 * Select silently swallows any unknown errors
@@ -1047,18 +1044,17 @@ select_copyout(void *arg, struct kevent *kevp, int count, int *res)
                                 */
                                if (kevp[i].filter != EVFILT_READ &&
                                    kevp[i].filter != EVFILT_WRITE &&
-                                   kevp[i].data != EOPNOTSUPP) {
-                                       skap->error = kevp[i].data;
-                                       *res = 0;
-                                       return (1);
+                                   error != EOPNOTSUPP) {
+                                       skap->error = error;
+                                       *res = -1;
+                                       return error;
                                }
                                break;
                        }
                        if (nseldebug)
-                               kprintf("select fd %ju filter %d error %jd\n",
+                               kprintf("select fd %ju filter %d error %d\n",
                                        (uintmax_t)kevp[i].ident,
-                                       kevp[i].filter,
-                                       (intmax_t)kevp[i].data);
+                                       kevp[i].filter, error);
                        continue;
                }
 
@@ -1118,6 +1114,12 @@ putbits(int bytes, kfd_set *in_set, fd_set *out_set)
        return (error);
 }
 
+static int
+dotimeout_only(struct timespec *ts)
+{
+       return(nanosleep1(ts, NULL));
+}
+
 /*
  * Common code for sys_select() and sys_pselect().
  *
@@ -1139,6 +1141,9 @@ doselect(int nd, fd_set *read, fd_set *write, fd_set *except,
        *res = 0;
        if (nd < 0)
                return (EINVAL);
+       if (nd == 0)
+               return (dotimeout_only(ts));
+
        if (nd > p->p_fd->fd_nfiles)            /* limit kmalloc */
                nd = p->p_fd->fd_nfiles;
 
@@ -1379,14 +1384,17 @@ poll_copyout(void *arg, struct kevent *kevp, int count, int *res)
                        case EVFILT_READ:
 #if 0
                                /*
-                                * EOF on the read side can indicate a
+                                * NODATA on the read side can indicate a
                                 * half-closed situation and not necessarily
                                 * a disconnect, so depend on the user
                                 * issuing a read() and getting 0 bytes back.
                                 */
-                               if (kevp[i].flags & EV_EOF)
+                               if (kevp[i].flags & EV_NODATA)
                                        pfd->revents |= POLLHUP;
 #endif
+                               if ((kevp[i].flags & EV_EOF) &&
+                                   kevp[i].fflags != 0)
+                                       pfd->revents |= POLLERR;
                                if (pfd->events & POLLIN)
                                        pfd->revents |= POLLIN;
                                if (pfd->events & POLLRDNORM)
@@ -1403,6 +1411,8 @@ poll_copyout(void *arg, struct kevent *kevp, int count, int *res)
                                 */
                                if (kevp[i].flags & EV_EOF) {
                                        pfd->revents |= POLLHUP;
+                                       if (kevp[i].fflags != 0)
+                                               pfd->revents |= POLLERR;
                                } else {
                                        if (pfd->events & POLLOUT)
                                                pfd->revents |= POLLOUT;
@@ -1412,7 +1422,7 @@ poll_copyout(void *arg, struct kevent *kevp, int count, int *res)
                                break;
                        case EVFILT_EXCEPT:
                                /*
-                                * EV_EOF should never be tagged for this
+                                * EV_NODATA should never be tagged for this
                                 * filter.
                                 */
                                if (pfd->events & POLLPRI)
@@ -1452,6 +1462,9 @@ dopoll(int nfds, struct pollfd *fds, struct timespec *ts, int *res)
         if (nfds < 0)
                 return (EINVAL);
 
+       if (nfds == 0)
+               return (dotimeout_only(ts));
+
        /*
         * This is a bit arbitrary but we need to limit internal kmallocs.
         */