sys/kern/kern_time.c

   1 /*
   2  * Copyright (c) 1982, 1986, 1989, 1993
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 3. All advertising materials mentioning features or use of this software
  14  *    must display the following acknowledgement:
  15  *      This product includes software developed by the University of
  16  *      California, Berkeley and its contributors.
  17  * 4. Neither the name of the University nor the names of its contributors
  18  *    may be used to endorse or promote products derived from this software
  19  *    without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  31  * SUCH DAMAGE.
  32  *
  33  *      @(#)kern_time.c 8.1 (Berkeley) 6/10/93
  34  * $FreeBSD: src/sys/kern/kern_time.c,v 1.68.2.1 2002/10/01 08:00:41 bde Exp $
  35  * $DragonFly: src/sys/kern/kern_time.c,v 1.24 2005/04/22 17:41:15 joerg Exp $
  36  */
  37
  38 #include <sys/param.h>
  39 #include <sys/systm.h>
  40 #include <sys/buf.h>
  41 #include <sys/sysproto.h>
  42 #include <sys/resourcevar.h>
  43 #include <sys/signalvar.h>
  44 #include <sys/kernel.h>
  45 #include <sys/systm.h>
  46 #include <sys/sysent.h>
  47 #include <sys/sysunion.h>
  48 #include <sys/proc.h>
  49 #include <sys/time.h>
  50 #include <sys/vnode.h>
  51 #include <sys/sysctl.h>
  52 #include <vm/vm.h>
  53 #include <vm/vm_extern.h>
  54 #include <sys/msgport2.h>
  55 #include <sys/thread2.h>
  56
  57 struct timezone tz;
  58
  59 /*
  60  * Time of day and interval timer support.
  61  *
  62  * These routines provide the kernel entry points to get and set
  63  * the time-of-day and per-process interval timers.  Subroutines
  64  * here provide support for adding and subtracting timeval structures
  65  * and decrementing interval timers, optionally reloading the interval
  66  * timers when they expire.
  67  */
  68
  69 static int      nanosleep1 (struct timespec *rqt,
  70                     struct timespec *rmt);
  71 static int      settime (struct timeval *);
  72 static void     timevalfix (struct timeval *);
  73 static void     no_lease_updatetime (int);
  74
  75 static int     sleep_hard_us = 100;
  76 SYSCTL_INT(_kern, OID_AUTO, sleep_hard_us, CTLFLAG_RW, &sleep_hard_us, 0, "")
  77
  78 static void
  79 no_lease_updatetime(deltat)
  80         int deltat;
  81 {
  82 }
  83
  84 void (*lease_updatetime) (int)  = no_lease_updatetime;
  85
  86 static int
  87 settime(tv)
  88         struct timeval *tv;
  89 {
  90         struct timeval delta, tv1, tv2;
  91         static struct timeval maxtime, laststep;
  92         struct timespec ts;
  93         int origcpu;
  94
  95         if ((origcpu = mycpu->gd_cpuid) != 0)
  96                 lwkt_setcpu_self(globaldata_find(0));
  97
  98         crit_enter();
  99         microtime(&tv1);
 100         delta = *tv;
 101         timevalsub(&delta, &tv1);
 102
 103         /*
 104          * If the system is secure, we do not allow the time to be
 105          * set to a value earlier than 1 second less than the highest
 106          * time we have yet seen. The worst a miscreant can do in
 107          * this circumstance is "freeze" time. He couldn't go
 108          * back to the past.
 109          *
 110          * We similarly do not allow the clock to be stepped more
 111          * than one second, nor more than once per second. This allows
 112          * a miscreant to make the clock march double-time, but no worse.
 113          */
 114         if (securelevel > 1) {
 115                 if (delta.tv_sec < 0 || delta.tv_usec < 0) {
 116                         /*
 117                          * Update maxtime to latest time we've seen.
 118                          */
 119                         if (tv1.tv_sec > maxtime.tv_sec)
 120                                 maxtime = tv1;
 121                         tv2 = *tv;
 122                         timevalsub(&tv2, &maxtime);
 123                         if (tv2.tv_sec < -1) {
 124                                 tv->tv_sec = maxtime.tv_sec - 1;
 125                                 printf("Time adjustment clamped to -1 second\n");
 126                         }
 127                 } else {
 128                         if (tv1.tv_sec == laststep.tv_sec) {
 129                                 crit_exit();
 130                                 return (EPERM);
 131                         }
 132                         if (delta.tv_sec > 1) {
 133                                 tv->tv_sec = tv1.tv_sec + 1;
 134                                 printf("Time adjustment clamped to +1 second\n");
 135                         }
 136                         laststep = *tv;
 137                 }
 138         }
 139
 140         ts.tv_sec = tv->tv_sec;
 141         ts.tv_nsec = tv->tv_usec * 1000;
 142         set_timeofday(&ts);
 143         lease_updatetime(delta.tv_sec);
 144         crit_exit();
 145
 146         if (origcpu != 0)
 147                 lwkt_setcpu_self(globaldata_find(origcpu));
 148
 149         resettodr();
 150         return (0);
 151 }
 152
 153 /* ARGSUSED */
 154 int
 155 clock_gettime(struct clock_gettime_args *uap)
 156 {
 157         struct timespec ats;
 158
 159         switch(uap->clock_id) {
 160         case CLOCK_REALTIME:
 161                 nanotime(&ats);
 162                 return (copyout(&ats, uap->tp, sizeof(ats)));
 163         case CLOCK_MONOTONIC:
 164                 nanouptime(&ats);
 165                 return (copyout(&ats, uap->tp, sizeof(ats)));
 166         default:
 167                 return (EINVAL);
 168         }
 169 }
 170
 171 /* ARGSUSED */
 172 int
 173 clock_settime(struct clock_settime_args *uap)
 174 {
 175         struct thread *td = curthread;
 176         struct timeval atv;
 177         struct timespec ats;
 178         int error;
 179
 180         if ((error = suser(td)) != 0)
 181                 return (error);
 182         switch(uap->clock_id) {
 183         case CLOCK_REALTIME:
 184                 if ((error = copyin(uap->tp, &ats, sizeof(ats))) != 0)
 185                         return (error);
 186                 if (ats.tv_nsec < 0 || ats.tv_nsec >= 1000000000)
 187                         return (EINVAL);
 188                 /* XXX Don't convert nsec->usec and back */
 189                 TIMESPEC_TO_TIMEVAL(&atv, &ats);
 190                 error = settime(&atv);
 191                 return (error);
 192         default:
 193                 return (EINVAL);
 194         }
 195 }
 196
 197 int
 198 clock_getres(struct clock_getres_args *uap)
 199 {
 200         struct timespec ts;
 201
 202         switch(uap->clock_id) {
 203         case CLOCK_REALTIME:
 204         case CLOCK_MONOTONIC:
 205                 /*
 206                  * Round up the result of the division cheaply
 207                  * by adding 1.  Rounding up is especially important
 208                  * if rounding down would give 0.  Perfect rounding
 209                  * is unimportant.
 210                  */
 211                 ts.tv_sec = 0;
 212                 ts.tv_nsec = 1000000000 / cputimer_freq + 1;
 213                 return(copyout(&ts, uap->tp, sizeof(ts)));
 214         default:
 215                 return(EINVAL);
 216         }
 217 }
 218
 219 /*
 220  * nanosleep1()
 221  *
 222  *      This is a general helper function for nanosleep() (aka sleep() aka
 223  *      usleep()).
 224  *
 225  *      If there is less then one tick's worth of time left and
 226  *      we haven't done a yield, or the remaining microseconds is
 227  *      ridiculously low, do a yield.  This avoids having
 228  *      to deal with systimer overheads when the system is under
 229  *      heavy loads.  If we have done a yield already then use
 230  *      a systimer and an uninterruptable thread wait.
 231  *
 232  *      If there is more then a tick's worth of time left,
 233  *      calculate the baseline ticks and use an interruptable
 234  *      tsleep, then handle the fine-grained delay on the next
 235  *      loop.  This usually results in two sleeps occuring, a long one
 236  *      and a short one.
 237  */
 238 static void
 239 ns1_systimer(systimer_t info)
 240 {
 241         lwkt_schedule(info->data);
 242 }
 243
 244 static int
 245 nanosleep1(struct timespec *rqt, struct timespec *rmt)
 246 {
 247         static int nanowait;
 248         struct timespec ts, ts2, ts3;
 249         struct timeval tv;
 250         int error;
 251         int tried_yield;
 252
 253         if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000)
 254                 return (EINVAL);
 255         if (rqt->tv_sec < 0 || (rqt->tv_sec == 0 && rqt->tv_nsec == 0))
 256                 return (0);
 257         nanouptime(&ts);
 258         timespecadd(&ts, rqt);          /* ts = target timestamp compare */
 259         TIMESPEC_TO_TIMEVAL(&tv, rqt);  /* tv = sleep interval */
 260         tried_yield = 0;
 261
 262         for (;;) {
 263                 int ticks;
 264                 struct systimer info;
 265
 266                 ticks = tv.tv_usec / tick;      /* approximate */
 267
 268                 if (tv.tv_sec == 0 && ticks == 0) {
 269                         thread_t td = curthread;
 270                         if (tried_yield || tv.tv_usec < sleep_hard_us) {
 271                                 tried_yield = 0;
 272                                 uio_yield();
 273                         } else {
 274                                 crit_enter_quick(td);
 275                                 systimer_init_oneshot(&info, ns1_systimer,
 276                                                 td, tv.tv_usec);
 277                                 lwkt_deschedule_self(td);
 278                                 crit_exit_quick(td);
 279                                 lwkt_switch();
 280                                 systimer_del(&info); /* make sure it's gone */
 281                         }
 282                         error = iscaught(td->td_proc);
 283                 } else if (tv.tv_sec == 0) {
 284                         error = tsleep(&nanowait, PCATCH, "nanslp", ticks);
 285                 } else {
 286                         ticks = tvtohz_low(&tv); /* also handles overflow */
 287                         error = tsleep(&nanowait, PCATCH, "nanslp", ticks);
 288                 }
 289                 nanouptime(&ts2);
 290                 if (error && error != EWOULDBLOCK) {
 291                         if (error == ERESTART)
 292                                 error = EINTR;
 293                         if (rmt != NULL) {
 294                                 timespecsub(&ts, &ts2);
 295                                 if (ts.tv_sec < 0)
 296                                         timespecclear(&ts);
 297                                 *rmt = ts;
 298                         }
 299                         return (error);
 300                 }
 301                 if (timespeccmp(&ts2, &ts, >=))
 302                         return (0);
 303                 ts3 = ts;
 304                 timespecsub(&ts3, &ts2);
 305                 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 306         }
 307 }
 308
 309 static void nanosleep_done(void *arg);
 310 static void nanosleep_copyout(union sysunion *sysun);
 311
 312 /* ARGSUSED */
 313 int
 314 nanosleep(struct nanosleep_args *uap)
 315 {
 316         int error;
 317         struct sysmsg_sleep *smsleep = &uap->sysmsg.sm.sleep;
 318
 319         error = copyin(uap->rqtp, &smsleep->rqt, sizeof(smsleep->rqt));
 320         if (error)
 321                 return (error);
 322         /*
 323          * YYY clean this up to always use the callout, note that an abort
 324          * implementation should record the residual in the async case.
 325          */
 326         if (uap->sysmsg.lmsg.ms_flags & MSGF_ASYNC) {
 327                 quad_t ticks;
 328
 329                 ticks = (quad_t)smsleep->rqt.tv_nsec * hz / 1000000000LL;
 330                 if (smsleep->rqt.tv_sec)
 331                         ticks += (quad_t)smsleep->rqt.tv_sec * hz;
 332                 if (ticks <= 0) {
 333                         if (ticks == 0)
 334                                 error = 0;
 335                         else
 336                                 error = EINVAL;
 337                 } else {
 338                         uap->sysmsg.copyout = nanosleep_copyout;
 339                         uap->sysmsg.lmsg.ms_flags &= ~MSGF_DONE;
 340                         callout_init(&smsleep->timer);
 341                         callout_reset(&smsleep->timer, ticks, nanosleep_done, uap);
 342                         error = EASYNC;
 343                 }
 344         } else {
 345                 /*
 346                  * Old synchronous sleep code, copyout the residual if
 347                  * nanosleep was interrupted.
 348                  */
 349                 error = nanosleep1(&smsleep->rqt, &smsleep->rmt);
 350                 if (error && uap->rmtp)
 351                         error = copyout(&smsleep->rmt, uap->rmtp, sizeof(smsleep->rmt));
 352         }
 353         return (error);
 354 }
 355
 356 /*
 357  * Asynch completion for the nanosleep() syscall.  This function may be
 358  * called from any context and cannot legally access the originating
 359  * thread, proc, or its user space.
 360  *
 361  * YYY change the callout interface API so we can simply assign the replymsg
 362  * function to it directly.
 363  */
 364 static void
 365 nanosleep_done(void *arg)
 366 {
 367         struct nanosleep_args *uap = arg;
 368         lwkt_msg_t msg = &uap->sysmsg.lmsg;
 369
 370         lwkt_replymsg(msg, 0);
 371 }
 372
 373 /*
 374  * Asynch return for the nanosleep() syscall, called in the context of the
 375  * originating thread when it pulls the message off the reply port.  This
 376  * function is responsible for any copyouts to userland.  Kernel threads
 377  * which do their own internal system calls will not usually call the return
 378  * function.
 379  */
 380 static void
 381 nanosleep_copyout(union sysunion *sysun)
 382 {
 383         struct nanosleep_args *uap = &sysun->nanosleep;
 384         struct sysmsg_sleep *smsleep = &uap->sysmsg.sm.sleep;
 385
 386         if (sysun->lmsg.ms_error && uap->rmtp) {
 387                 sysun->lmsg.ms_error =
 388                     copyout(&smsleep->rmt, uap->rmtp, sizeof(smsleep->rmt));
 389         }
 390 }
 391
 392 /* ARGSUSED */
 393 int
 394 gettimeofday(struct gettimeofday_args *uap)
 395 {
 396         struct timeval atv;
 397         int error = 0;
 398
 399         if (uap->tp) {
 400                 microtime(&atv);
 401                 if ((error = copyout((caddr_t)&atv, (caddr_t)uap->tp,
 402                     sizeof (atv))))
 403                         return (error);
 404         }
 405         if (uap->tzp)
 406                 error = copyout((caddr_t)&tz, (caddr_t)uap->tzp,
 407                     sizeof (tz));
 408         return (error);
 409 }
 410
 411 /* ARGSUSED */
 412 int
 413 settimeofday(struct settimeofday_args *uap)
 414 {
 415         struct thread *td = curthread;
 416         struct timeval atv;
 417         struct timezone atz;
 418         int error;
 419
 420         if ((error = suser(td)))
 421                 return (error);
 422         /* Verify all parameters before changing time. */
 423         if (uap->tv) {
 424                 if ((error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
 425                     sizeof(atv))))
 426                         return (error);
 427                 if (atv.tv_usec < 0 || atv.tv_usec >= 1000000)
 428                         return (EINVAL);
 429         }
 430         if (uap->tzp &&
 431             (error = copyin((caddr_t)uap->tzp, (caddr_t)&atz, sizeof(atz))))
 432                 return (error);
 433         if (uap->tv && (error = settime(&atv)))
 434                 return (error);
 435         if (uap->tzp)
 436                 tz = atz;
 437         return (0);
 438 }
 439
 440 static void
 441 kern_adjtime_common(void)
 442 {
 443         if ((ntp_delta >= 0 && ntp_delta < ntp_default_tick_delta) ||
 444             (ntp_delta < 0 && ntp_delta > ntp_default_tick_delta))
 445                 ntp_tick_delta = ntp_delta;
 446         else if (ntp_delta > ntp_big_delta)
 447                 ntp_tick_delta = 10 * ntp_default_tick_delta;
 448         else if (ntp_delta < -ntp_big_delta)
 449                 ntp_tick_delta = -10 * ntp_default_tick_delta;
 450         else if (ntp_delta > 0)
 451                 ntp_tick_delta = ntp_default_tick_delta;
 452         else
 453                 ntp_tick_delta = -ntp_default_tick_delta;
 454 }
 455
 456 void
 457 kern_adjtime(int64_t delta, int64_t *odelta)
 458 {
 459         int origcpu;
 460
 461         if ((origcpu = mycpu->gd_cpuid) != 0)
 462                 lwkt_setcpu_self(globaldata_find(0));
 463
 464         crit_enter();
 465         *odelta = ntp_delta;
 466         ntp_delta += delta;
 467         kern_adjtime_common();
 468         crit_exit();
 469
 470         if (origcpu != 0)
 471                 lwkt_setcpu_self(globaldata_find(origcpu));
 472 }
 473
 474 void
 475 kern_reladjtime(int64_t delta)
 476 {
 477         int origcpu;
 478
 479         if ((origcpu = mycpu->gd_cpuid) != 0)
 480                 lwkt_setcpu_self(globaldata_find(0));
 481
 482         crit_enter();
 483         ntp_delta += delta;
 484         kern_adjtime_common();
 485         crit_exit();
 486
 487         if (origcpu != 0)
 488                 lwkt_setcpu_self(globaldata_find(origcpu));
 489 }
 490
 491 static void
 492 kern_adjfreq(int64_t rate)
 493 {
 494         int origcpu;
 495
 496         if ((origcpu = mycpu->gd_cpuid) != 0)
 497                 lwkt_setcpu_self(globaldata_find(0));
 498
 499         crit_enter();
 500         ntp_tick_permanent = rate;
 501         crit_exit();
 502
 503         if (origcpu != 0)
 504                 lwkt_setcpu_self(globaldata_find(origcpu));
 505 }
 506
 507 /* ARGSUSED */
 508 int
 509 adjtime(struct adjtime_args *uap)
 510 {
 511         struct thread *td = curthread;
 512         struct timeval atv;
 513         int64_t ndelta, odelta;
 514         int error;
 515
 516         if ((error = suser(td)))
 517                 return (error);
 518         if ((error =
 519             copyin((caddr_t)uap->delta, (caddr_t)&atv, sizeof(struct timeval))))
 520                 return (error);
 521
 522         /*
 523          * Compute the total correction and the rate at which to apply it.
 524          * Round the adjustment down to a whole multiple of the per-tick
 525          * delta, so that after some number of incremental changes in
 526          * hardclock(), tickdelta will become zero, lest the correction
 527          * overshoot and start taking us away from the desired final time.
 528          */
 529         ndelta = atv.tv_sec * 1000000000 + atv.tv_usec * 1000;
 530         kern_adjtime(ndelta, &odelta);
 531
 532         if (uap->olddelta) {
 533                 atv.tv_sec = odelta / 1000000000;
 534                 atv.tv_usec = odelta % 1000000 / 1000;
 535                 (void) copyout((caddr_t)&atv, (caddr_t)uap->olddelta,
 536                     sizeof(struct timeval));
 537         }
 538         return (0);
 539 }
 540
 541 static int
 542 sysctl_adjtime(SYSCTL_HANDLER_ARGS)
 543 {
 544         int64_t delta;
 545         int error;
 546
 547         if (req->oldptr != NULL) {
 548                 delta = 0;
 549                 error = SYSCTL_OUT(req, &delta, sizeof(delta));
 550                 if (error)
 551                         return (error);
 552         }
 553         if (req->newptr != NULL) {
 554                 if (suser(curthread))
 555                         return (EPERM);
 556                 error = SYSCTL_IN(req, &delta, sizeof(delta));
 557                 if (error)
 558                         return (error);
 559                 kern_reladjtime(delta);
 560         }
 561         return (0);
 562 }
 563
 564 static int
 565 sysctl_adjfreq(SYSCTL_HANDLER_ARGS)
 566 {
 567         int64_t freqdelta;
 568         int error;
 569
 570         if (req->oldptr != NULL) {
 571                 freqdelta = ntp_tick_permanent * hz;
 572                 error = SYSCTL_OUT(req, &freqdelta, sizeof(freqdelta));
 573                 if (error)
 574                         return (error);
 575         }
 576         if (req->newptr != NULL) {
 577                 if (suser(curthread))
 578                         return (EPERM);
 579                 error = SYSCTL_IN(req, &freqdelta, sizeof(freqdelta));
 580                 if (error)
 581                         return (error);
 582
 583                 freqdelta /= hz;
 584                 kern_adjfreq(freqdelta);
 585         }
 586         return (0);
 587 }
 588
 589 SYSCTL_NODE(_kern, OID_AUTO, ntp, CTLFLAG_RW, 0, "NTP related controls");
 590 SYSCTL_PROC(_kern_ntp, OID_AUTO, permanent,
 591     CTLTYPE_OPAQUE|CTLFLAG_RW, 0, 0,
 592     sysctl_adjfreq, "LU", "permanent correction per second");
 593 SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, delta, CTLFLAG_RD,
 594     &ntp_delta, sizeof(ntp_delta), "LU",
 595     "one-time delta");
 596 SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, big_delta, CTLFLAG_RD,
 597     &ntp_big_delta, sizeof(ntp_big_delta), "LU",
 598     "threshold for fast adjustment");
 599 SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, tick_delta, CTLFLAG_RD,
 600     &ntp_tick_delta, sizeof(ntp_tick_delta), "LU",
 601     "per-tick adjustment");
 602 SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, default_tick_delta, CTLFLAG_RD,
 603     &ntp_default_tick_delta, sizeof(ntp_default_tick_delta), "LU",
 604     "default per-tick adjustment");
 605 SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, next_leap_second, CTLFLAG_RW,
 606     &ntp_leap_second, sizeof(ntp_leap_second), "LU",
 607     "next leap second");
 608 SYSCTL_INT(_kern_ntp, OID_AUTO, insert_leap_second, CTLFLAG_RW,
 609     &ntp_leap_insert, 0, "insert or remove leap second");
 610 SYSCTL_PROC(_kern_ntp, OID_AUTO, adjust,
 611     CTLTYPE_OPAQUE|CTLFLAG_RW, 0, 0,
 612     sysctl_adjtime, "", "relative adjust for delta");
 613
 614 /*
 615  * Get value of an interval timer.  The process virtual and
 616  * profiling virtual time timers are kept in the p_stats area, since
 617  * they can be swapped out.  These are kept internally in the
 618  * way they are specified externally: in time until they expire.
 619  *
 620  * The real time interval timer is kept in the process table slot
 621  * for the process, and its value (it_value) is kept as an
 622  * absolute time rather than as a delta, so that it is easy to keep
 623  * periodic real-time signals from drifting.
 624  *
 625  * Virtual time timers are processed in the hardclock() routine of
 626  * kern_clock.c.  The real time timer is processed by a timeout
 627  * routine, called from the softclock() routine.  Since a callout
 628  * may be delayed in real time due to interrupt processing in the system,
 629  * it is possible for the real time timeout routine (realitexpire, given below),
 630  * to be delayed in real time past when it is supposed to occur.  It
 631  * does not suffice, therefore, to reload the real timer .it_value from the
 632  * real time timers .it_interval.  Rather, we compute the next time in
 633  * absolute time the timer should go off.
 634  */
 635 /* ARGSUSED */
 636 int
 637 getitimer(struct getitimer_args *uap)
 638 {
 639         struct proc *p = curproc;
 640         struct timeval ctv;
 641         struct itimerval aitv;
 642
 643         if (uap->which > ITIMER_PROF)
 644                 return (EINVAL);
 645         crit_enter();
 646         if (uap->which == ITIMER_REAL) {
 647                 /*
 648                  * Convert from absolute to relative time in .it_value
 649                  * part of real time timer.  If time for real time timer
 650                  * has passed return 0, else return difference between
 651                  * current time and time for the timer to go off.
 652                  */
 653                 aitv = p->p_realtimer;
 654                 if (timevalisset(&aitv.it_value)) {
 655                         getmicrouptime(&ctv);
 656                         if (timevalcmp(&aitv.it_value, &ctv, <))
 657                                 timevalclear(&aitv.it_value);
 658                         else
 659                                 timevalsub(&aitv.it_value, &ctv);
 660                 }
 661         } else {
 662                 aitv = p->p_stats->p_timer[uap->which];
 663         }
 664         crit_exit();
 665         return (copyout((caddr_t)&aitv, (caddr_t)uap->itv,
 666             sizeof (struct itimerval)));
 667 }
 668
 669 /* ARGSUSED */
 670 int
 671 setitimer(struct setitimer_args *uap)
 672 {
 673         struct itimerval aitv;
 674         struct timeval ctv;
 675         struct itimerval *itvp;
 676         struct proc *p = curproc;
 677         int error;
 678
 679         if (uap->which > ITIMER_PROF)
 680                 return (EINVAL);
 681         itvp = uap->itv;
 682         if (itvp && (error = copyin((caddr_t)itvp, (caddr_t)&aitv,
 683             sizeof(struct itimerval))))
 684                 return (error);
 685         if ((uap->itv = uap->oitv) &&
 686             (error = getitimer((struct getitimer_args *)uap)))
 687                 return (error);
 688         if (itvp == 0)
 689                 return (0);
 690         if (itimerfix(&aitv.it_value))
 691                 return (EINVAL);
 692         if (!timevalisset(&aitv.it_value))
 693                 timevalclear(&aitv.it_interval);
 694         else if (itimerfix(&aitv.it_interval))
 695                 return (EINVAL);
 696         crit_enter();
 697         if (uap->which == ITIMER_REAL) {
 698                 if (timevalisset(&p->p_realtimer.it_value))
 699                         callout_stop(&p->p_ithandle);
 700                 if (timevalisset(&aitv.it_value))
 701                         callout_reset(&p->p_ithandle,
 702                             tvtohz_high(&aitv.it_value), realitexpire, p);
 703                 getmicrouptime(&ctv);
 704                 timevaladd(&aitv.it_value, &ctv);
 705                 p->p_realtimer = aitv;
 706         } else {
 707                 p->p_stats->p_timer[uap->which] = aitv;
 708         }
 709         crit_exit();
 710         return (0);
 711 }
 712
 713 /*
 714  * Real interval timer expired:
 715  * send process whose timer expired an alarm signal.
 716  * If time is not set up to reload, then just return.
 717  * Else compute next time timer should go off which is > current time.
 718  * This is where delay in processing this timeout causes multiple
 719  * SIGALRM calls to be compressed into one.
 720  * tvtohz_high() always adds 1 to allow for the time until the next clock
 721  * interrupt being strictly less than 1 clock tick, but we don't want
 722  * that here since we want to appear to be in sync with the clock
 723  * interrupt even when we're delayed.
 724  */
 725 void
 726 realitexpire(arg)
 727         void *arg;
 728 {
 729         struct proc *p;
 730         struct timeval ctv, ntv;
 731
 732         p = (struct proc *)arg;
 733         psignal(p, SIGALRM);
 734         if (!timevalisset(&p->p_realtimer.it_interval)) {
 735                 timevalclear(&p->p_realtimer.it_value);
 736                 return;
 737         }
 738         for (;;) {
 739                 crit_enter();
 740                 timevaladd(&p->p_realtimer.it_value,
 741                     &p->p_realtimer.it_interval);
 742                 getmicrouptime(&ctv);
 743                 if (timevalcmp(&p->p_realtimer.it_value, &ctv, >)) {
 744                         ntv = p->p_realtimer.it_value;
 745                         timevalsub(&ntv, &ctv);
 746                         callout_reset(&p->p_ithandle, tvtohz_low(&ntv),
 747                                       realitexpire, p);
 748                         crit_exit();
 749                         return;
 750                 }
 751                 crit_exit();
 752         }
 753 }
 754
 755 /*
 756  * Check that a proposed value to load into the .it_value or
 757  * .it_interval part of an interval timer is acceptable, and
 758  * fix it to have at least minimal value (i.e. if it is less
 759  * than the resolution of the clock, round it up.)
 760  */
 761 int
 762 itimerfix(tv)
 763         struct timeval *tv;
 764 {
 765
 766         if (tv->tv_sec < 0 || tv->tv_sec > 100000000 ||
 767             tv->tv_usec < 0 || tv->tv_usec >= 1000000)
 768                 return (EINVAL);
 769         if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < tick)
 770                 tv->tv_usec = tick;
 771         return (0);
 772 }
 773
 774 /*
 775  * Decrement an interval timer by a specified number
 776  * of microseconds, which must be less than a second,
 777  * i.e. < 1000000.  If the timer expires, then reload
 778  * it.  In this case, carry over (usec - old value) to
 779  * reduce the value reloaded into the timer so that
 780  * the timer does not drift.  This routine assumes
 781  * that it is called in a context where the timers
 782  * on which it is operating cannot change in value.
 783  */
 784 int
 785 itimerdecr(itp, usec)
 786         struct itimerval *itp;
 787         int usec;
 788 {
 789
 790         if (itp->it_value.tv_usec < usec) {
 791                 if (itp->it_value.tv_sec == 0) {
 792                         /* expired, and already in next interval */
 793                         usec -= itp->it_value.tv_usec;
 794                         goto expire;
 795                 }
 796                 itp->it_value.tv_usec += 1000000;
 797                 itp->it_value.tv_sec--;
 798         }
 799         itp->it_value.tv_usec -= usec;
 800         usec = 0;
 801         if (timevalisset(&itp->it_value))
 802                 return (1);
 803         /* expired, exactly at end of interval */
 804 expire:
 805         if (timevalisset(&itp->it_interval)) {
 806                 itp->it_value = itp->it_interval;
 807                 itp->it_value.tv_usec -= usec;
 808                 if (itp->it_value.tv_usec < 0) {
 809                         itp->it_value.tv_usec += 1000000;
 810                         itp->it_value.tv_sec--;
 811                 }
 812         } else
 813                 itp->it_value.tv_usec = 0;              /* sec is already 0 */
 814         return (0);
 815 }
 816
 817 /*
 818  * Add and subtract routines for timevals.
 819  * N.B.: subtract routine doesn't deal with
 820  * results which are before the beginning,
 821  * it just gets very confused in this case.
 822  * Caveat emptor.
 823  */
 824 void
 825 timevaladd(t1, t2)
 826         struct timeval *t1, *t2;
 827 {
 828
 829         t1->tv_sec += t2->tv_sec;
 830         t1->tv_usec += t2->tv_usec;
 831         timevalfix(t1);
 832 }
 833
 834 void
 835 timevalsub(t1, t2)
 836         struct timeval *t1, *t2;
 837 {
 838
 839         t1->tv_sec -= t2->tv_sec;
 840         t1->tv_usec -= t2->tv_usec;
 841         timevalfix(t1);
 842 }
 843
 844 static void
 845 timevalfix(t1)
 846         struct timeval *t1;
 847 {
 848
 849         if (t1->tv_usec < 0) {
 850                 t1->tv_sec--;
 851                 t1->tv_usec += 1000000;
 852         }
 853         if (t1->tv_usec >= 1000000) {
 854                 t1->tv_sec++;
 855                 t1->tv_usec -= 1000000;
 856         }
 857 }
 858
 859 /*
 860  * ratecheck(): simple time-based rate-limit checking.
 861  */
 862 int
 863 ratecheck(struct timeval *lasttime, const struct timeval *mininterval)
 864 {
 865         struct timeval tv, delta;
 866         int rv = 0;
 867
 868         getmicrouptime(&tv);            /* NB: 10ms precision */
 869         delta = tv;
 870         timevalsub(&delta, lasttime);
 871
 872         /*
 873          * check for 0,0 is so that the message will be seen at least once,
 874          * even if interval is huge.
 875          */
 876         if (timevalcmp(&delta, mininterval, >=) ||
 877             (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) {
 878                 *lasttime = tv;
 879                 rv = 1;
 880         }
 881
 882         return (rv);
 883 }
 884
 885 /*
 886  * ppsratecheck(): packets (or events) per second limitation.
 887  *
 888  * Return 0 if the limit is to be enforced (e.g. the caller
 889  * should drop a packet because of the rate limitation).
 890  *
 891  * maxpps of 0 always causes zero to be returned.  maxpps of -1
 892  * always causes 1 to be returned; this effectively defeats rate
 893  * limiting.
 894  *
 895  * Note that we maintain the struct timeval for compatibility
 896  * with other bsd systems.  We reuse the storage and just monitor
 897  * clock ticks for minimal overhead.
 898  */
 899 int
 900 ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps)
 901 {
 902         int now;
 903
 904         /*
 905          * Reset the last time and counter if this is the first call
 906          * or more than a second has passed since the last update of
 907          * lasttime.
 908          */
 909         now = ticks;
 910         if (lasttime->tv_sec == 0 || (u_int)(now - lasttime->tv_sec) >= hz) {
 911                 lasttime->tv_sec = now;
 912                 *curpps = 1;
 913                 return (maxpps != 0);
 914         } else {
 915                 (*curpps)++;            /* NB: ignore potential overflow */
 916                 return (maxpps < 0 || *curpps < maxpps);
 917         }
 918 }
 919