kernel - Major signal path adjustments to fix races, tsleep race fixes, +more
[dragonfly.git] / sys / kern / kern_time.c
index 617eb13..3058bf0 100644 (file)
@@ -68,12 +68,24 @@ struct timezone tz;
  * timers when they expire.
  */
 
-int    nanosleep1(struct timespec *rqt, struct timespec *rmt);
 static int     settime(struct timeval *);
 static void    timevalfix(struct timeval *);
 
-static int     sleep_hard_us = 100;
-SYSCTL_INT(_kern, OID_AUTO, sleep_hard_us, CTLFLAG_RW, &sleep_hard_us, 0, "")
+/*
+ * Nanosleep tries very hard to sleep for a precisely requested time
+ * interval, down to 1uS.  The administrator can impose a minimum delay
+ * and a delay below which we hard-loop instead of initiate a timer
+ * interrupt and sleep.
+ *
+ * For machines under high loads it might be beneficial to increase min_us
+ * to e.g. 1000uS (1ms) so spining processes sleep meaningfully.
+ */
+static int     nanosleep_min_us = 10;
+static int     nanosleep_hard_us = 100;
+SYSCTL_INT(_kern, OID_AUTO, nanosleep_min_us, CTLFLAG_RW,
+          &nanosleep_min_us, 0, "")
+SYSCTL_INT(_kern, OID_AUTO, nanosleep_hard_us, CTLFLAG_RW,
+          &nanosleep_hard_us, 0, "")
 
 static int
 settime(struct timeval *tv)
@@ -313,8 +325,11 @@ nanosleep1(struct timespec *rqt, struct timespec *rmt)
 
                if (tv.tv_sec == 0 && ticks == 0) {
                        thread_t td = curthread;
-                       if (tv.tv_usec < sleep_hard_us) {
+                       if (tv.tv_usec > 0 && tv.tv_usec < nanosleep_min_us)
+                               tv.tv_usec = nanosleep_min_us;
+                       if (tv.tv_usec < nanosleep_hard_us) {
                                lwkt_user_yield();
+                               cpu_pause();
                        } else {
                                crit_enter_quick(td);
                                systimer_init_oneshot(&info, ns1_systimer,
@@ -689,8 +704,7 @@ sys_getitimer(struct getitimer_args *uap)
 
        if (uap->which > ITIMER_PROF)
                return (EINVAL);
-       get_mplock();
-       crit_enter();
+       lwkt_gettoken(&p->p_token);
        if (uap->which == ITIMER_REAL) {
                /*
                 * Convert from absolute to relative time in .it_value
@@ -709,8 +723,7 @@ sys_getitimer(struct getitimer_args *uap)
        } else {
                aitv = p->p_timer[uap->which];
        }
-       crit_exit();
-       rel_mplock();
+       lwkt_reltoken(&p->p_token);
        return (copyout(&aitv, uap->itv, sizeof (struct itimerval)));
 }
 
@@ -743,8 +756,7 @@ sys_setitimer(struct setitimer_args *uap)
                timevalclear(&aitv.it_interval);
        else if (itimerfix(&aitv.it_interval))
                return (EINVAL);
-       get_mplock();
-       crit_enter();
+       lwkt_gettoken(&p->p_token);
        if (uap->which == ITIMER_REAL) {
                if (timevalisset(&p->p_realtimer.it_value))
                        callout_stop(&p->p_ithandle);
@@ -756,9 +768,16 @@ sys_setitimer(struct setitimer_args *uap)
                p->p_realtimer = aitv;
        } else {
                p->p_timer[uap->which] = aitv;
+               switch(uap->which) {
+               case ITIMER_VIRTUAL:
+                       p->p_flags &= ~P_SIGVTALRM;
+                       break;
+               case ITIMER_PROF:
+                       p->p_flags &= ~P_SIGPROF;
+                       break;
+               }
        }
-       crit_exit();
-       rel_mplock();
+       lwkt_reltoken(&p->p_token);
        return (0);
 }
 
@@ -781,26 +800,27 @@ realitexpire(void *arg)
        struct timeval ctv, ntv;
 
        p = (struct proc *)arg;
+       lwkt_gettoken(&p->p_token);
        ksignal(p, SIGALRM);
        if (!timevalisset(&p->p_realtimer.it_interval)) {
                timevalclear(&p->p_realtimer.it_value);
+               lwkt_reltoken(&p->p_token);
                return;
        }
        for (;;) {
-               crit_enter();
                timevaladd(&p->p_realtimer.it_value,
-                   &p->p_realtimer.it_interval);
+                          &p->p_realtimer.it_interval);
                getmicrouptime(&ctv);
                if (timevalcmp(&p->p_realtimer.it_value, &ctv, >)) {
                        ntv = p->p_realtimer.it_value;
                        timevalsub(&ntv, &ctv);
                        callout_reset(&p->p_ithandle, tvtohz_low(&ntv),
                                      realitexpire, p);
-                       crit_exit();
+                       lwkt_reltoken(&p->p_token);
                        return;
                }
-               crit_exit();
        }
+       lwkt_reltoken(&p->p_token);
 }
 
 /*