kernel - Add additional clock_gettime() modes
[dragonfly.git] / sys / kern / kern_time.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)kern_time.c 8.1 (Berkeley) 6/10/93
34 * $FreeBSD: src/sys/kern/kern_time.c,v 1.68.2.1 2002/10/01 08:00:41 bde Exp $
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/buf.h>
40#include <sys/sysproto.h>
41#include <sys/resourcevar.h>
42#include <sys/signalvar.h>
43#include <sys/kernel.h>
984263bc 44#include <sys/sysent.h>
df2244e3 45#include <sys/sysunion.h>
984263bc 46#include <sys/proc.h>
895c1f85 47#include <sys/priv.h>
984263bc
MD
48#include <sys/time.h>
49#include <sys/vnode.h>
a94976ad 50#include <sys/sysctl.h>
b3ce8a64 51#include <sys/kern_syscall.h>
984263bc
MD
52#include <vm/vm.h>
53#include <vm/vm_extern.h>
684a93c4 54
245e4f17 55#include <sys/msgport2.h>
88c4d2f6 56#include <sys/thread2.h>
684a93c4 57#include <sys/mplock2.h>
984263bc
MD
58
59struct timezone tz;
60
61/*
62 * Time of day and interval timer support.
63 *
64 * These routines provide the kernel entry points to get and set
65 * the time-of-day and per-process interval timers. Subroutines
66 * here provide support for adding and subtracting timeval structures
67 * and decrementing interval timers, optionally reloading the interval
68 * timers when they expire.
69 */
70
b3ce8a64
MD
71static int settime(struct timeval *);
72static void timevalfix(struct timeval *);
984263bc 73
3b58baa0
MD
74/*
75 * Nanosleep tries very hard to sleep for a precisely requested time
76 * interval, down to 1uS. The administrator can impose a minimum delay
77 * and a delay below which we hard-loop instead of initiate a timer
78 * interrupt and sleep.
79 *
80 * For machines under high loads it might be beneficial to increase min_us
81 * to e.g. 1000uS (1ms) so spining processes sleep meaningfully.
82 */
83static int nanosleep_min_us = 10;
84static int nanosleep_hard_us = 100;
85SYSCTL_INT(_kern, OID_AUTO, nanosleep_min_us, CTLFLAG_RW,
86 &nanosleep_min_us, 0, "")
87SYSCTL_INT(_kern, OID_AUTO, nanosleep_hard_us, CTLFLAG_RW,
88 &nanosleep_hard_us, 0, "")
a94976ad 89
984263bc 90static int
c972a82f 91settime(struct timeval *tv)
984263bc
MD
92{
93 struct timeval delta, tv1, tv2;
94 static struct timeval maxtime, laststep;
95 struct timespec ts;
33924148
JS
96 int origcpu;
97
157202af 98 if ((origcpu = mycpu->gd_cpuid) != 0)
33924148 99 lwkt_setcpu_self(globaldata_find(0));
984263bc 100
88c4d2f6 101 crit_enter();
984263bc
MD
102 microtime(&tv1);
103 delta = *tv;
104 timevalsub(&delta, &tv1);
105
106 /*
107 * If the system is secure, we do not allow the time to be
108 * set to a value earlier than 1 second less than the highest
109 * time we have yet seen. The worst a miscreant can do in
110 * this circumstance is "freeze" time. He couldn't go
111 * back to the past.
112 *
113 * We similarly do not allow the clock to be stepped more
114 * than one second, nor more than once per second. This allows
115 * a miscreant to make the clock march double-time, but no worse.
116 */
117 if (securelevel > 1) {
118 if (delta.tv_sec < 0 || delta.tv_usec < 0) {
119 /*
120 * Update maxtime to latest time we've seen.
121 */
122 if (tv1.tv_sec > maxtime.tv_sec)
123 maxtime = tv1;
124 tv2 = *tv;
125 timevalsub(&tv2, &maxtime);
126 if (tv2.tv_sec < -1) {
127 tv->tv_sec = maxtime.tv_sec - 1;
6ea70f76 128 kprintf("Time adjustment clamped to -1 second\n");
984263bc
MD
129 }
130 } else {
131 if (tv1.tv_sec == laststep.tv_sec) {
88c4d2f6 132 crit_exit();
984263bc
MD
133 return (EPERM);
134 }
135 if (delta.tv_sec > 1) {
136 tv->tv_sec = tv1.tv_sec + 1;
6ea70f76 137 kprintf("Time adjustment clamped to +1 second\n");
984263bc
MD
138 }
139 laststep = *tv;
140 }
141 }
142
143 ts.tv_sec = tv->tv_sec;
144 ts.tv_nsec = tv->tv_usec * 1000;
88c4d2f6 145 set_timeofday(&ts);
88c4d2f6 146 crit_exit();
33924148 147
157202af 148 if (origcpu != 0)
33924148 149 lwkt_setcpu_self(globaldata_find(origcpu));
33924148 150
984263bc
MD
151 resettodr();
152 return (0);
153}
154
3919ced0
MD
155/*
156 * MPSAFE
157 */
984263bc 158int
b3ce8a64 159kern_clock_gettime(clockid_t clock_id, struct timespec *ats)
984263bc 160{
b3ce8a64 161 int error = 0;
91810a6f 162 struct proc *p;
984263bc 163
b3ce8a64 164 switch(clock_id) {
26be1876 165 case CLOCK_REALTIME:
91810a6f 166 case CLOCK_REALTIME_PRECISE:
b3ce8a64
MD
167 nanotime(ats);
168 break;
91810a6f
MD
169 case CLOCK_REALTIME_FAST:
170 getnanotime(ats);
171 break;
26be1876 172 case CLOCK_MONOTONIC:
91810a6f
MD
173 case CLOCK_MONOTONIC_PRECISE:
174 case CLOCK_UPTIME:
175 case CLOCK_UPTIME_PRECISE:
b3ce8a64
MD
176 nanouptime(ats);
177 break;
91810a6f
MD
178 case CLOCK_MONOTONIC_FAST:
179 case CLOCK_UPTIME_FAST:
180 getnanouptime(ats);
181 break;
182 case CLOCK_VIRTUAL:
183 p = curproc;
184 ats->tv_sec = p->p_timer[ITIMER_VIRTUAL].it_value.tv_sec;
185 ats->tv_nsec = p->p_timer[ITIMER_VIRTUAL].it_value.tv_usec *
186 1000;
187 break;
188 case CLOCK_PROF:
189 p = curproc;
190 ats->tv_sec = p->p_timer[ITIMER_PROF].it_value.tv_sec;
191 ats->tv_nsec = p->p_timer[ITIMER_PROF].it_value.tv_usec *
192 1000;
193 break;
194 case CLOCK_SECOND:
195 ats->tv_sec = time_second;
196 ats->tv_nsec = 0;
197 break;
26be1876 198 default:
b3ce8a64
MD
199 error = EINVAL;
200 break;
26be1876 201 }
b3ce8a64 202 return (error);
984263bc
MD
203}
204
3919ced0
MD
205/*
206 * MPSAFE
207 */
984263bc 208int
b3ce8a64
MD
209sys_clock_gettime(struct clock_gettime_args *uap)
210{
211 struct timespec ats;
212 int error;
213
214 error = kern_clock_gettime(uap->clock_id, &ats);
215 if (error == 0)
216 error = copyout(&ats, uap->tp, sizeof(ats));
217
218 return (error);
219}
220
221int
222kern_clock_settime(clockid_t clock_id, struct timespec *ats)
984263bc 223{
dadab5e9 224 struct thread *td = curthread;
984263bc 225 struct timeval atv;
984263bc
MD
226 int error;
227
cc125f38 228 if ((error = priv_check(td, PRIV_CLOCK_SETTIME)) != 0)
984263bc 229 return (error);
b3ce8a64 230 if (clock_id != CLOCK_REALTIME)
984263bc 231 return (EINVAL);
b3ce8a64
MD
232 if (ats->tv_nsec < 0 || ats->tv_nsec >= 1000000000)
233 return (EINVAL);
234
235 TIMESPEC_TO_TIMEVAL(&atv, ats);
236 error = settime(&atv);
237 return (error);
984263bc
MD
238}
239
3919ced0
MD
240/*
241 * MPALMOSTSAFE
242 */
984263bc 243int
b3ce8a64 244sys_clock_settime(struct clock_settime_args *uap)
984263bc 245{
b3ce8a64
MD
246 struct timespec ats;
247 int error;
248
249 if ((error = copyin(uap->tp, &ats, sizeof(ats))) != 0)
250 return (error);
251
3919ced0
MD
252 get_mplock();
253 error = kern_clock_settime(uap->clock_id, &ats);
254 rel_mplock();
255 return (error);
b3ce8a64
MD
256}
257
3919ced0
MD
258/*
259 * MPSAFE
260 */
b3ce8a64
MD
261int
262kern_clock_getres(clockid_t clock_id, struct timespec *ts)
263{
264 int error;
984263bc 265
b3ce8a64 266 switch(clock_id) {
26be1876 267 case CLOCK_REALTIME:
91810a6f
MD
268 case CLOCK_REALTIME_FAST:
269 case CLOCK_REALTIME_PRECISE:
26be1876 270 case CLOCK_MONOTONIC:
91810a6f
MD
271 case CLOCK_MONOTONIC_FAST:
272 case CLOCK_MONOTONIC_PRECISE:
273 case CLOCK_UPTIME:
274 case CLOCK_UPTIME_FAST:
275 case CLOCK_UPTIME_PRECISE:
984263bc 276 /*
26be1876
MD
277 * Round up the result of the division cheaply
278 * by adding 1. Rounding up is especially important
279 * if rounding down would give 0. Perfect rounding
280 * is unimportant.
984263bc 281 */
b3ce8a64
MD
282 ts->tv_sec = 0;
283 ts->tv_nsec = 1000000000 / sys_cputimer->freq + 1;
284 error = 0;
285 break;
91810a6f
MD
286 case CLOCK_VIRTUAL:
287 case CLOCK_PROF:
288 /* Accurately round up here because we can do so cheaply. */
289 ts->tv_sec = 0;
290 ts->tv_nsec = (1000000000 + hz - 1) / hz;
291 error = 0;
292 break;
293 case CLOCK_SECOND:
294 ts->tv_sec = 1;
295 ts->tv_nsec = 0;
296 error = 0;
297 break;
26be1876 298 default:
b3ce8a64
MD
299 error = EINVAL;
300 break;
984263bc 301 }
b3ce8a64
MD
302
303 return(error);
304}
305
3919ced0
MD
306/*
307 * MPSAFE
308 */
b3ce8a64
MD
309int
310sys_clock_getres(struct clock_getres_args *uap)
311{
312 int error;
313 struct timespec ts;
314
315 error = kern_clock_getres(uap->clock_id, &ts);
316 if (error == 0)
317 error = copyout(&ts, uap->tp, sizeof(ts));
318
319 return (error);
984263bc
MD
320}
321
88c4d2f6
MD
322/*
323 * nanosleep1()
324 *
325 * This is a general helper function for nanosleep() (aka sleep() aka
326 * usleep()).
327 *
328 * If there is less then one tick's worth of time left and
329 * we haven't done a yield, or the remaining microseconds is
330 * ridiculously low, do a yield. This avoids having
331 * to deal with systimer overheads when the system is under
332 * heavy loads. If we have done a yield already then use
333 * a systimer and an uninterruptable thread wait.
334 *
335 * If there is more then a tick's worth of time left,
336 * calculate the baseline ticks and use an interruptable
337 * tsleep, then handle the fine-grained delay on the next
338 * loop. This usually results in two sleeps occuring, a long one
339 * and a short one.
3919ced0
MD
340 *
341 * MPSAFE
88c4d2f6
MD
342 */
343static void
96d52ac8
SZ
344ns1_systimer(systimer_t info, int in_ipi __unused,
345 struct intrframe *frame __unused)
88c4d2f6
MD
346{
347 lwkt_schedule(info->data);
348}
984263bc 349
8ba5f7ef 350int
41c20dac 351nanosleep1(struct timespec *rqt, struct timespec *rmt)
984263bc 352{
88c4d2f6 353 static int nanowait;
984263bc
MD
354 struct timespec ts, ts2, ts3;
355 struct timeval tv;
356 int error;
357
358 if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000)
359 return (EINVAL);
8ba5f7ef 360 /* XXX: imho this should return EINVAL at least for tv_sec < 0 */
984263bc
MD
361 if (rqt->tv_sec < 0 || (rqt->tv_sec == 0 && rqt->tv_nsec == 0))
362 return (0);
a94976ad
MD
363 nanouptime(&ts);
364 timespecadd(&ts, rqt); /* ts = target timestamp compare */
365 TIMESPEC_TO_TIMEVAL(&tv, rqt); /* tv = sleep interval */
88c4d2f6 366
984263bc 367 for (;;) {
88c4d2f6
MD
368 int ticks;
369 struct systimer info;
370
a591f597 371 ticks = tv.tv_usec / ustick; /* approximate */
a94976ad 372
88c4d2f6 373 if (tv.tv_sec == 0 && ticks == 0) {
37af14fe 374 thread_t td = curthread;
3b58baa0
MD
375 if (tv.tv_usec > 0 && tv.tv_usec < nanosleep_min_us)
376 tv.tv_usec = nanosleep_min_us;
377 if (tv.tv_usec < nanosleep_hard_us) {
f9235b6d 378 lwkt_user_yield();
3b58baa0 379 cpu_pause();
a94976ad 380 } else {
37af14fe 381 crit_enter_quick(td);
88c4d2f6 382 systimer_init_oneshot(&info, ns1_systimer,
37af14fe
MD
383 td, tv.tv_usec);
384 lwkt_deschedule_self(td);
385 crit_exit_quick(td);
88c4d2f6
MD
386 lwkt_switch();
387 systimer_del(&info); /* make sure it's gone */
a94976ad 388 }
08f2f1bb 389 error = iscaught(td->td_lwp);
88c4d2f6
MD
390 } else if (tv.tv_sec == 0) {
391 error = tsleep(&nanowait, PCATCH, "nanslp", ticks);
a94976ad 392 } else {
88c4d2f6
MD
393 ticks = tvtohz_low(&tv); /* also handles overflow */
394 error = tsleep(&nanowait, PCATCH, "nanslp", ticks);
a94976ad
MD
395 }
396 nanouptime(&ts2);
88c4d2f6 397 if (error && error != EWOULDBLOCK) {
984263bc
MD
398 if (error == ERESTART)
399 error = EINTR;
400 if (rmt != NULL) {
401 timespecsub(&ts, &ts2);
402 if (ts.tv_sec < 0)
403 timespecclear(&ts);
404 *rmt = ts;
405 }
406 return (error);
407 }
408 if (timespeccmp(&ts2, &ts, >=))
409 return (0);
410 ts3 = ts;
411 timespecsub(&ts3, &ts2);
412 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
413 }
414}
415
3919ced0
MD
416/*
417 * MPSAFE
418 */
984263bc 419int
753fd850 420sys_nanosleep(struct nanosleep_args *uap)
984263bc 421{
245e4f17 422 int error;
f9a13fc4
MD
423 struct timespec rqt;
424 struct timespec rmt;
984263bc 425
f9a13fc4 426 error = copyin(uap->rqtp, &rqt, sizeof(rqt));
984263bc
MD
427 if (error)
428 return (error);
f9a13fc4
MD
429
430 error = nanosleep1(&rqt, &rmt);
431
245e4f17 432 /*
f9a13fc4 433 * copyout the residual if nanosleep was interrupted.
245e4f17 434 */
55d25c87
SS
435 if (error && uap->rmtp) {
436 int error2;
437
438 error2 = copyout(&rmt, uap->rmtp, sizeof(rmt));
439 if (error2)
440 error = error2;
441 }
984263bc
MD
442 return (error);
443}
444
3919ced0
MD
445/*
446 * MPSAFE
447 */
984263bc 448int
753fd850 449sys_gettimeofday(struct gettimeofday_args *uap)
984263bc
MD
450{
451 struct timeval atv;
452 int error = 0;
453
454 if (uap->tp) {
455 microtime(&atv);
456 if ((error = copyout((caddr_t)&atv, (caddr_t)uap->tp,
457 sizeof (atv))))
458 return (error);
459 }
460 if (uap->tzp)
461 error = copyout((caddr_t)&tz, (caddr_t)uap->tzp,
462 sizeof (tz));
463 return (error);
464}
465
3919ced0
MD
466/*
467 * MPALMOSTSAFE
468 */
984263bc 469int
753fd850 470sys_settimeofday(struct settimeofday_args *uap)
984263bc 471{
dadab5e9 472 struct thread *td = curthread;
984263bc
MD
473 struct timeval atv;
474 struct timezone atz;
475 int error;
476
cc125f38 477 if ((error = priv_check(td, PRIV_SETTIMEOFDAY)))
984263bc 478 return (error);
f59ccd43
MD
479 /*
480 * Verify all parameters before changing time.
481 *
482 * NOTE: We do not allow the time to be set to 0.0, which also by
483 * happy coincidence works around a pkgsrc bulk build bug.
484 */
984263bc
MD
485 if (uap->tv) {
486 if ((error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
487 sizeof(atv))))
488 return (error);
489 if (atv.tv_usec < 0 || atv.tv_usec >= 1000000)
490 return (EINVAL);
f59ccd43
MD
491 if (atv.tv_sec == 0 && atv.tv_usec == 0)
492 return (EINVAL);
984263bc
MD
493 }
494 if (uap->tzp &&
495 (error = copyin((caddr_t)uap->tzp, (caddr_t)&atz, sizeof(atz))))
496 return (error);
3919ced0
MD
497
498 get_mplock();
499 if (uap->tv && (error = settime(&atv))) {
500 rel_mplock();
984263bc 501 return (error);
3919ced0
MD
502 }
503 rel_mplock();
984263bc
MD
504 if (uap->tzp)
505 tz = atz;
506 return (0);
507}
508
4026c000
JS
509static void
510kern_adjtime_common(void)
511{
512 if ((ntp_delta >= 0 && ntp_delta < ntp_default_tick_delta) ||
7df7080b 513 (ntp_delta < 0 && ntp_delta > -ntp_default_tick_delta))
4026c000
JS
514 ntp_tick_delta = ntp_delta;
515 else if (ntp_delta > ntp_big_delta)
516 ntp_tick_delta = 10 * ntp_default_tick_delta;
517 else if (ntp_delta < -ntp_big_delta)
518 ntp_tick_delta = -10 * ntp_default_tick_delta;
519 else if (ntp_delta > 0)
520 ntp_tick_delta = ntp_default_tick_delta;
521 else
522 ntp_tick_delta = -ntp_default_tick_delta;
523}
524
525void
526kern_adjtime(int64_t delta, int64_t *odelta)
527{
528 int origcpu;
529
157202af 530 if ((origcpu = mycpu->gd_cpuid) != 0)
4026c000 531 lwkt_setcpu_self(globaldata_find(0));
4026c000
JS
532
533 crit_enter();
534 *odelta = ntp_delta;
08f95c49 535 ntp_delta = delta;
4026c000
JS
536 kern_adjtime_common();
537 crit_exit();
538
157202af 539 if (origcpu != 0)
4026c000 540 lwkt_setcpu_self(globaldata_find(origcpu));
4026c000
JS
541}
542
b6da4cbb
JS
543static void
544kern_get_ntp_delta(int64_t *delta)
545{
546 int origcpu;
547
548 if ((origcpu = mycpu->gd_cpuid) != 0)
549 lwkt_setcpu_self(globaldata_find(0));
550
551 crit_enter();
552 *delta = ntp_delta;
553 crit_exit();
554
555 if (origcpu != 0)
556 lwkt_setcpu_self(globaldata_find(origcpu));
557}
558
4026c000
JS
559void
560kern_reladjtime(int64_t delta)
561{
562 int origcpu;
563
157202af 564 if ((origcpu = mycpu->gd_cpuid) != 0)
4026c000 565 lwkt_setcpu_self(globaldata_find(0));
4026c000
JS
566
567 crit_enter();
568 ntp_delta += delta;
569 kern_adjtime_common();
570 crit_exit();
571
157202af 572 if (origcpu != 0)
4026c000 573 lwkt_setcpu_self(globaldata_find(origcpu));
4026c000 574}
984263bc 575
0143455b
JS
576static void
577kern_adjfreq(int64_t rate)
578{
579 int origcpu;
580
157202af 581 if ((origcpu = mycpu->gd_cpuid) != 0)
0143455b 582 lwkt_setcpu_self(globaldata_find(0));
0143455b
JS
583
584 crit_enter();
585 ntp_tick_permanent = rate;
586 crit_exit();
587
157202af 588 if (origcpu != 0)
0143455b 589 lwkt_setcpu_self(globaldata_find(origcpu));
0143455b
JS
590}
591
3919ced0
MD
592/*
593 * MPALMOSTSAFE
594 */
984263bc 595int
753fd850 596sys_adjtime(struct adjtime_args *uap)
984263bc 597{
dadab5e9 598 struct thread *td = curthread;
984263bc 599 struct timeval atv;
4026c000 600 int64_t ndelta, odelta;
88c4d2f6 601 int error;
984263bc 602
cc125f38 603 if ((error = priv_check(td, PRIV_ADJTIME)))
984263bc 604 return (error);
3919ced0
MD
605 error = copyin(uap->delta, &atv, sizeof(struct timeval));
606 if (error)
984263bc
MD
607 return (error);
608
609 /*
610 * Compute the total correction and the rate at which to apply it.
611 * Round the adjustment down to a whole multiple of the per-tick
612 * delta, so that after some number of incremental changes in
613 * hardclock(), tickdelta will become zero, lest the correction
614 * overshoot and start taking us away from the desired final time.
615 */
08f95c49 616 ndelta = (int64_t)atv.tv_sec * 1000000000 + atv.tv_usec * 1000;
3919ced0 617 get_mplock();
4026c000 618 kern_adjtime(ndelta, &odelta);
3919ced0 619 rel_mplock();
984263bc
MD
620
621 if (uap->olddelta) {
4026c000 622 atv.tv_sec = odelta / 1000000000;
a821f7fc 623 atv.tv_usec = odelta % 1000000000 / 1000;
3919ced0 624 copyout(&atv, uap->olddelta, sizeof(struct timeval));
984263bc
MD
625 }
626 return (0);
627}
628
4026c000
JS
629static int
630sysctl_adjtime(SYSCTL_HANDLER_ARGS)
631{
632 int64_t delta;
633 int error;
634
4026c000 635 if (req->newptr != NULL) {
895c1f85 636 if (priv_check(curthread, PRIV_ROOT))
4026c000
JS
637 return (EPERM);
638 error = SYSCTL_IN(req, &delta, sizeof(delta));
639 if (error)
640 return (error);
641 kern_reladjtime(delta);
642 }
5eb5a6bc
MD
643
644 if (req->oldptr)
645 kern_get_ntp_delta(&delta);
646 error = SYSCTL_OUT(req, &delta, sizeof(delta));
647 return (error);
4026c000
JS
648}
649
08f95c49
MD
650/*
651 * delta is in nanoseconds.
652 */
0143455b 653static int
b6da4cbb
JS
654sysctl_delta(SYSCTL_HANDLER_ARGS)
655{
656 int64_t delta, old_delta;
657 int error;
658
659 if (req->newptr != NULL) {
895c1f85 660 if (priv_check(curthread, PRIV_ROOT))
b6da4cbb
JS
661 return (EPERM);
662 error = SYSCTL_IN(req, &delta, sizeof(delta));
663 if (error)
664 return (error);
665 kern_adjtime(delta, &old_delta);
b6da4cbb
JS
666 }
667
5eb5a6bc
MD
668 if (req->oldptr != NULL)
669 kern_get_ntp_delta(&old_delta);
cebaad99 670 error = SYSCTL_OUT(req, &old_delta, sizeof(old_delta));
5eb5a6bc 671 return (error);
b6da4cbb
JS
672}
673
08f95c49
MD
674/*
675 * frequency is in nanoseconds per second shifted left 32.
676 * kern_adjfreq() needs it in nanoseconds per tick shifted left 32.
677 */
b6da4cbb 678static int
0143455b
JS
679sysctl_adjfreq(SYSCTL_HANDLER_ARGS)
680{
681 int64_t freqdelta;
682 int error;
683
0143455b 684 if (req->newptr != NULL) {
895c1f85 685 if (priv_check(curthread, PRIV_ROOT))
0143455b
JS
686 return (EPERM);
687 error = SYSCTL_IN(req, &freqdelta, sizeof(freqdelta));
688 if (error)
689 return (error);
690
691 freqdelta /= hz;
692 kern_adjfreq(freqdelta);
693 }
5eb5a6bc
MD
694
695 if (req->oldptr != NULL)
696 freqdelta = ntp_tick_permanent * hz;
697 error = SYSCTL_OUT(req, &freqdelta, sizeof(freqdelta));
698 if (error)
699 return (error);
700
0143455b
JS
701 return (0);
702}
703
4026c000 704SYSCTL_NODE(_kern, OID_AUTO, ntp, CTLFLAG_RW, 0, "NTP related controls");
5eb5a6bc
MD
705SYSCTL_PROC(_kern_ntp, OID_AUTO, permanent,
706 CTLTYPE_QUAD|CTLFLAG_RW, 0, 0,
cebaad99 707 sysctl_adjfreq, "Q", "permanent correction per second");
b6da4cbb 708SYSCTL_PROC(_kern_ntp, OID_AUTO, delta,
5eb5a6bc 709 CTLTYPE_QUAD|CTLFLAG_RW, 0, 0,
cebaad99 710 sysctl_delta, "Q", "one-time delta");
5eb5a6bc
MD
711SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, big_delta, CTLFLAG_RD,
712 &ntp_big_delta, sizeof(ntp_big_delta), "Q",
713 "threshold for fast adjustment");
714SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, tick_delta, CTLFLAG_RD,
715 &ntp_tick_delta, sizeof(ntp_tick_delta), "LU",
716 "per-tick adjustment");
717SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, default_tick_delta, CTLFLAG_RD,
718 &ntp_default_tick_delta, sizeof(ntp_default_tick_delta), "LU",
719 "default per-tick adjustment");
48590578 720SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, next_leap_second, CTLFLAG_RW,
5eb5a6bc
MD
721 &ntp_leap_second, sizeof(ntp_leap_second), "LU",
722 "next leap second");
48590578
JS
723SYSCTL_INT(_kern_ntp, OID_AUTO, insert_leap_second, CTLFLAG_RW,
724 &ntp_leap_insert, 0, "insert or remove leap second");
4026c000 725SYSCTL_PROC(_kern_ntp, OID_AUTO, adjust,
cebaad99
JS
726 CTLTYPE_QUAD|CTLFLAG_RW, 0, 0,
727 sysctl_adjtime, "Q", "relative adjust for delta");
4026c000 728
984263bc
MD
729/*
730 * Get value of an interval timer. The process virtual and
731 * profiling virtual time timers are kept in the p_stats area, since
732 * they can be swapped out. These are kept internally in the
733 * way they are specified externally: in time until they expire.
734 *
735 * The real time interval timer is kept in the process table slot
736 * for the process, and its value (it_value) is kept as an
737 * absolute time rather than as a delta, so that it is easy to keep
738 * periodic real-time signals from drifting.
739 *
740 * Virtual time timers are processed in the hardclock() routine of
741 * kern_clock.c. The real time timer is processed by a timeout
742 * routine, called from the softclock() routine. Since a callout
743 * may be delayed in real time due to interrupt processing in the system,
744 * it is possible for the real time timeout routine (realitexpire, given below),
745 * to be delayed in real time past when it is supposed to occur. It
746 * does not suffice, therefore, to reload the real timer .it_value from the
747 * real time timers .it_interval. Rather, we compute the next time in
748 * absolute time the timer should go off.
3919ced0
MD
749 *
750 * MPALMOSTSAFE
984263bc 751 */
984263bc 752int
753fd850 753sys_getitimer(struct getitimer_args *uap)
984263bc 754{
41c20dac 755 struct proc *p = curproc;
984263bc
MD
756 struct timeval ctv;
757 struct itimerval aitv;
984263bc
MD
758
759 if (uap->which > ITIMER_PROF)
760 return (EINVAL);
d7f4c458 761 lwkt_gettoken(&p->p_token);
984263bc
MD
762 if (uap->which == ITIMER_REAL) {
763 /*
764 * Convert from absolute to relative time in .it_value
765 * part of real time timer. If time for real time timer
766 * has passed return 0, else return difference between
767 * current time and time for the timer to go off.
768 */
769 aitv = p->p_realtimer;
770 if (timevalisset(&aitv.it_value)) {
771 getmicrouptime(&ctv);
772 if (timevalcmp(&aitv.it_value, &ctv, <))
773 timevalclear(&aitv.it_value);
774 else
775 timevalsub(&aitv.it_value, &ctv);
776 }
88c4d2f6 777 } else {
93328593 778 aitv = p->p_timer[uap->which];
88c4d2f6 779 }
d7f4c458 780 lwkt_reltoken(&p->p_token);
3919ced0 781 return (copyout(&aitv, uap->itv, sizeof (struct itimerval)));
984263bc
MD
782}
783
3919ced0
MD
784/*
785 * MPALMOSTSAFE
786 */
984263bc 787int
753fd850 788sys_setitimer(struct setitimer_args *uap)
984263bc
MD
789{
790 struct itimerval aitv;
791 struct timeval ctv;
41c20dac
MD
792 struct itimerval *itvp;
793 struct proc *p = curproc;
88c4d2f6 794 int error;
984263bc
MD
795
796 if (uap->which > ITIMER_PROF)
797 return (EINVAL);
798 itvp = uap->itv;
799 if (itvp && (error = copyin((caddr_t)itvp, (caddr_t)&aitv,
800 sizeof(struct itimerval))))
801 return (error);
802 if ((uap->itv = uap->oitv) &&
753fd850 803 (error = sys_getitimer((struct getitimer_args *)uap)))
984263bc 804 return (error);
4090d6ff 805 if (itvp == NULL)
984263bc
MD
806 return (0);
807 if (itimerfix(&aitv.it_value))
808 return (EINVAL);
809 if (!timevalisset(&aitv.it_value))
810 timevalclear(&aitv.it_interval);
811 else if (itimerfix(&aitv.it_interval))
812 return (EINVAL);
d7f4c458 813 lwkt_gettoken(&p->p_token);
984263bc
MD
814 if (uap->which == ITIMER_REAL) {
815 if (timevalisset(&p->p_realtimer.it_value))
a471eac5 816 callout_stop_sync(&p->p_ithandle);
984263bc 817 if (timevalisset(&aitv.it_value))
8fbf9130
JS
818 callout_reset(&p->p_ithandle,
819 tvtohz_high(&aitv.it_value), realitexpire, p);
984263bc
MD
820 getmicrouptime(&ctv);
821 timevaladd(&aitv.it_value, &ctv);
822 p->p_realtimer = aitv;
88c4d2f6 823 } else {
93328593 824 p->p_timer[uap->which] = aitv;
898e34b3
MD
825 switch(uap->which) {
826 case ITIMER_VIRTUAL:
4643740a 827 p->p_flags &= ~P_SIGVTALRM;
898e34b3
MD
828 break;
829 case ITIMER_PROF:
4643740a 830 p->p_flags &= ~P_SIGPROF;
898e34b3
MD
831 break;
832 }
88c4d2f6 833 }
d7f4c458 834 lwkt_reltoken(&p->p_token);
984263bc
MD
835 return (0);
836}
837
838/*
839 * Real interval timer expired:
840 * send process whose timer expired an alarm signal.
841 * If time is not set up to reload, then just return.
842 * Else compute next time timer should go off which is > current time.
843 * This is where delay in processing this timeout causes multiple
844 * SIGALRM calls to be compressed into one.
a94976ad 845 * tvtohz_high() always adds 1 to allow for the time until the next clock
984263bc
MD
846 * interrupt being strictly less than 1 clock tick, but we don't want
847 * that here since we want to appear to be in sync with the clock
848 * interrupt even when we're delayed.
849 */
850void
c972a82f 851realitexpire(void *arg)
984263bc 852{
1fd87d54 853 struct proc *p;
984263bc 854 struct timeval ctv, ntv;
984263bc
MD
855
856 p = (struct proc *)arg;
a471eac5 857 PHOLD(p);
d7f4c458 858 lwkt_gettoken(&p->p_token);
84204577 859 ksignal(p, SIGALRM);
984263bc
MD
860 if (!timevalisset(&p->p_realtimer.it_interval)) {
861 timevalclear(&p->p_realtimer.it_value);
a471eac5 862 goto done;
984263bc
MD
863 }
864 for (;;) {
984263bc 865 timevaladd(&p->p_realtimer.it_value,
d7f4c458 866 &p->p_realtimer.it_interval);
984263bc
MD
867 getmicrouptime(&ctv);
868 if (timevalcmp(&p->p_realtimer.it_value, &ctv, >)) {
869 ntv = p->p_realtimer.it_value;
870 timevalsub(&ntv, &ctv);
8fbf9130
JS
871 callout_reset(&p->p_ithandle, tvtohz_low(&ntv),
872 realitexpire, p);
a471eac5 873 goto done;
984263bc 874 }
984263bc 875 }
a471eac5 876done:
d7f4c458 877 lwkt_reltoken(&p->p_token);
a471eac5 878 PRELE(p);
984263bc
MD
879}
880
881/*
882 * Check that a proposed value to load into the .it_value or
883 * .it_interval part of an interval timer is acceptable, and
884 * fix it to have at least minimal value (i.e. if it is less
885 * than the resolution of the clock, round it up.)
3919ced0
MD
886 *
887 * MPSAFE
984263bc
MD
888 */
889int
c972a82f 890itimerfix(struct timeval *tv)
984263bc
MD
891{
892
893 if (tv->tv_sec < 0 || tv->tv_sec > 100000000 ||
894 tv->tv_usec < 0 || tv->tv_usec >= 1000000)
895 return (EINVAL);
a591f597
MD
896 if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < ustick)
897 tv->tv_usec = ustick;
984263bc
MD
898 return (0);
899}
900
901/*
902 * Decrement an interval timer by a specified number
903 * of microseconds, which must be less than a second,
904 * i.e. < 1000000. If the timer expires, then reload
905 * it. In this case, carry over (usec - old value) to
906 * reduce the value reloaded into the timer so that
907 * the timer does not drift. This routine assumes
908 * that it is called in a context where the timers
909 * on which it is operating cannot change in value.
910 */
911int
c972a82f 912itimerdecr(struct itimerval *itp, int usec)
984263bc
MD
913{
914
915 if (itp->it_value.tv_usec < usec) {
916 if (itp->it_value.tv_sec == 0) {
917 /* expired, and already in next interval */
918 usec -= itp->it_value.tv_usec;
919 goto expire;
920 }
921 itp->it_value.tv_usec += 1000000;
922 itp->it_value.tv_sec--;
923 }
924 itp->it_value.tv_usec -= usec;
925 usec = 0;
926 if (timevalisset(&itp->it_value))
927 return (1);
928 /* expired, exactly at end of interval */
929expire:
930 if (timevalisset(&itp->it_interval)) {
931 itp->it_value = itp->it_interval;
932 itp->it_value.tv_usec -= usec;
933 if (itp->it_value.tv_usec < 0) {
934 itp->it_value.tv_usec += 1000000;
935 itp->it_value.tv_sec--;
936 }
937 } else
938 itp->it_value.tv_usec = 0; /* sec is already 0 */
939 return (0);
940}
941
942/*
943 * Add and subtract routines for timevals.
944 * N.B.: subtract routine doesn't deal with
945 * results which are before the beginning,
946 * it just gets very confused in this case.
947 * Caveat emptor.
948 */
949void
9deadd02 950timevaladd(struct timeval *t1, const struct timeval *t2)
984263bc
MD
951{
952
953 t1->tv_sec += t2->tv_sec;
954 t1->tv_usec += t2->tv_usec;
955 timevalfix(t1);
956}
957
958void
9deadd02 959timevalsub(struct timeval *t1, const struct timeval *t2)
984263bc
MD
960{
961
962 t1->tv_sec -= t2->tv_sec;
963 t1->tv_usec -= t2->tv_usec;
964 timevalfix(t1);
965}
966
967static void
c972a82f 968timevalfix(struct timeval *t1)
984263bc
MD
969{
970
971 if (t1->tv_usec < 0) {
972 t1->tv_sec--;
973 t1->tv_usec += 1000000;
974 }
975 if (t1->tv_usec >= 1000000) {
976 t1->tv_sec++;
977 t1->tv_usec -= 1000000;
978 }
979}
cea4446f
HP
980
981/*
982 * ratecheck(): simple time-based rate-limit checking.
983 */
984int
985ratecheck(struct timeval *lasttime, const struct timeval *mininterval)
986{
987 struct timeval tv, delta;
988 int rv = 0;
989
990 getmicrouptime(&tv); /* NB: 10ms precision */
991 delta = tv;
992 timevalsub(&delta, lasttime);
993
994 /*
995 * check for 0,0 is so that the message will be seen at least once,
996 * even if interval is huge.
997 */
998 if (timevalcmp(&delta, mininterval, >=) ||
999 (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) {
1000 *lasttime = tv;
1001 rv = 1;
1002 }
1003
1004 return (rv);
1005}
1006
1007/*
1008 * ppsratecheck(): packets (or events) per second limitation.
1009 *
1010 * Return 0 if the limit is to be enforced (e.g. the caller
1011 * should drop a packet because of the rate limitation).
1012 *
1013 * maxpps of 0 always causes zero to be returned. maxpps of -1
1014 * always causes 1 to be returned; this effectively defeats rate
1015 * limiting.
1016 *
1017 * Note that we maintain the struct timeval for compatibility
1018 * with other bsd systems. We reuse the storage and just monitor
1019 * clock ticks for minimal overhead.
1020 */
1021int
1022ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps)
1023{
1024 int now;
1025
1026 /*
1027 * Reset the last time and counter if this is the first call
1028 * or more than a second has passed since the last update of
1029 * lasttime.
1030 */
1031 now = ticks;
1032 if (lasttime->tv_sec == 0 || (u_int)(now - lasttime->tv_sec) >= hz) {
1033 lasttime->tv_sec = now;
1034 *curpps = 1;
1035 return (maxpps != 0);
1036 } else {
1037 (*curpps)++; /* NB: ignore potential overflow */
1038 return (maxpps < 0 || *curpps < maxpps);
1039 }
1040}
1041