kernel - Major signal path adjustments to fix races, tsleep race fixes, +more
[dragonfly.git] / sys / kern / kern_time.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)kern_time.c 8.1 (Berkeley) 6/10/93
34 * $FreeBSD: src/sys/kern/kern_time.c,v 1.68.2.1 2002/10/01 08:00:41 bde Exp $
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/buf.h>
40#include <sys/sysproto.h>
41#include <sys/resourcevar.h>
42#include <sys/signalvar.h>
43#include <sys/kernel.h>
984263bc 44#include <sys/sysent.h>
df2244e3 45#include <sys/sysunion.h>
984263bc 46#include <sys/proc.h>
895c1f85 47#include <sys/priv.h>
984263bc
MD
48#include <sys/time.h>
49#include <sys/vnode.h>
a94976ad 50#include <sys/sysctl.h>
b3ce8a64 51#include <sys/kern_syscall.h>
984263bc
MD
52#include <vm/vm.h>
53#include <vm/vm_extern.h>
684a93c4 54
245e4f17 55#include <sys/msgport2.h>
88c4d2f6 56#include <sys/thread2.h>
684a93c4 57#include <sys/mplock2.h>
984263bc
MD
58
59struct timezone tz;
60
61/*
62 * Time of day and interval timer support.
63 *
64 * These routines provide the kernel entry points to get and set
65 * the time-of-day and per-process interval timers. Subroutines
66 * here provide support for adding and subtracting timeval structures
67 * and decrementing interval timers, optionally reloading the interval
68 * timers when they expire.
69 */
70
b3ce8a64
MD
71static int settime(struct timeval *);
72static void timevalfix(struct timeval *);
984263bc 73
3b58baa0
MD
74/*
75 * Nanosleep tries very hard to sleep for a precisely requested time
76 * interval, down to 1uS. The administrator can impose a minimum delay
77 * and a delay below which we hard-loop instead of initiate a timer
78 * interrupt and sleep.
79 *
80 * For machines under high loads it might be beneficial to increase min_us
81 * to e.g. 1000uS (1ms) so spining processes sleep meaningfully.
82 */
83static int nanosleep_min_us = 10;
84static int nanosleep_hard_us = 100;
85SYSCTL_INT(_kern, OID_AUTO, nanosleep_min_us, CTLFLAG_RW,
86 &nanosleep_min_us, 0, "")
87SYSCTL_INT(_kern, OID_AUTO, nanosleep_hard_us, CTLFLAG_RW,
88 &nanosleep_hard_us, 0, "")
a94976ad 89
984263bc 90static int
c972a82f 91settime(struct timeval *tv)
984263bc
MD
92{
93 struct timeval delta, tv1, tv2;
94 static struct timeval maxtime, laststep;
95 struct timespec ts;
33924148
JS
96 int origcpu;
97
157202af 98 if ((origcpu = mycpu->gd_cpuid) != 0)
33924148 99 lwkt_setcpu_self(globaldata_find(0));
984263bc 100
88c4d2f6 101 crit_enter();
984263bc
MD
102 microtime(&tv1);
103 delta = *tv;
104 timevalsub(&delta, &tv1);
105
106 /*
107 * If the system is secure, we do not allow the time to be
108 * set to a value earlier than 1 second less than the highest
109 * time we have yet seen. The worst a miscreant can do in
110 * this circumstance is "freeze" time. He couldn't go
111 * back to the past.
112 *
113 * We similarly do not allow the clock to be stepped more
114 * than one second, nor more than once per second. This allows
115 * a miscreant to make the clock march double-time, but no worse.
116 */
117 if (securelevel > 1) {
118 if (delta.tv_sec < 0 || delta.tv_usec < 0) {
119 /*
120 * Update maxtime to latest time we've seen.
121 */
122 if (tv1.tv_sec > maxtime.tv_sec)
123 maxtime = tv1;
124 tv2 = *tv;
125 timevalsub(&tv2, &maxtime);
126 if (tv2.tv_sec < -1) {
127 tv->tv_sec = maxtime.tv_sec - 1;
6ea70f76 128 kprintf("Time adjustment clamped to -1 second\n");
984263bc
MD
129 }
130 } else {
131 if (tv1.tv_sec == laststep.tv_sec) {
88c4d2f6 132 crit_exit();
984263bc
MD
133 return (EPERM);
134 }
135 if (delta.tv_sec > 1) {
136 tv->tv_sec = tv1.tv_sec + 1;
6ea70f76 137 kprintf("Time adjustment clamped to +1 second\n");
984263bc
MD
138 }
139 laststep = *tv;
140 }
141 }
142
143 ts.tv_sec = tv->tv_sec;
144 ts.tv_nsec = tv->tv_usec * 1000;
88c4d2f6 145 set_timeofday(&ts);
88c4d2f6 146 crit_exit();
33924148 147
157202af 148 if (origcpu != 0)
33924148 149 lwkt_setcpu_self(globaldata_find(origcpu));
33924148 150
984263bc
MD
151 resettodr();
152 return (0);
153}
154
3919ced0
MD
155/*
156 * MPSAFE
157 */
984263bc 158int
b3ce8a64 159kern_clock_gettime(clockid_t clock_id, struct timespec *ats)
984263bc 160{
b3ce8a64 161 int error = 0;
984263bc 162
b3ce8a64 163 switch(clock_id) {
26be1876 164 case CLOCK_REALTIME:
b3ce8a64
MD
165 nanotime(ats);
166 break;
26be1876 167 case CLOCK_MONOTONIC:
b3ce8a64
MD
168 nanouptime(ats);
169 break;
26be1876 170 default:
b3ce8a64
MD
171 error = EINVAL;
172 break;
26be1876 173 }
b3ce8a64 174 return (error);
984263bc
MD
175}
176
3919ced0
MD
177/*
178 * MPSAFE
179 */
984263bc 180int
b3ce8a64
MD
181sys_clock_gettime(struct clock_gettime_args *uap)
182{
183 struct timespec ats;
184 int error;
185
186 error = kern_clock_gettime(uap->clock_id, &ats);
187 if (error == 0)
188 error = copyout(&ats, uap->tp, sizeof(ats));
189
190 return (error);
191}
192
193int
194kern_clock_settime(clockid_t clock_id, struct timespec *ats)
984263bc 195{
dadab5e9 196 struct thread *td = curthread;
984263bc 197 struct timeval atv;
984263bc
MD
198 int error;
199
cc125f38 200 if ((error = priv_check(td, PRIV_CLOCK_SETTIME)) != 0)
984263bc 201 return (error);
b3ce8a64 202 if (clock_id != CLOCK_REALTIME)
984263bc 203 return (EINVAL);
b3ce8a64
MD
204 if (ats->tv_nsec < 0 || ats->tv_nsec >= 1000000000)
205 return (EINVAL);
206
207 TIMESPEC_TO_TIMEVAL(&atv, ats);
208 error = settime(&atv);
209 return (error);
984263bc
MD
210}
211
3919ced0
MD
212/*
213 * MPALMOSTSAFE
214 */
984263bc 215int
b3ce8a64 216sys_clock_settime(struct clock_settime_args *uap)
984263bc 217{
b3ce8a64
MD
218 struct timespec ats;
219 int error;
220
221 if ((error = copyin(uap->tp, &ats, sizeof(ats))) != 0)
222 return (error);
223
3919ced0
MD
224 get_mplock();
225 error = kern_clock_settime(uap->clock_id, &ats);
226 rel_mplock();
227 return (error);
b3ce8a64
MD
228}
229
3919ced0
MD
230/*
231 * MPSAFE
232 */
b3ce8a64
MD
233int
234kern_clock_getres(clockid_t clock_id, struct timespec *ts)
235{
236 int error;
984263bc 237
b3ce8a64 238 switch(clock_id) {
26be1876
MD
239 case CLOCK_REALTIME:
240 case CLOCK_MONOTONIC:
984263bc 241 /*
26be1876
MD
242 * Round up the result of the division cheaply
243 * by adding 1. Rounding up is especially important
244 * if rounding down would give 0. Perfect rounding
245 * is unimportant.
984263bc 246 */
b3ce8a64
MD
247 ts->tv_sec = 0;
248 ts->tv_nsec = 1000000000 / sys_cputimer->freq + 1;
249 error = 0;
250 break;
26be1876 251 default:
b3ce8a64
MD
252 error = EINVAL;
253 break;
984263bc 254 }
b3ce8a64
MD
255
256 return(error);
257}
258
3919ced0
MD
259/*
260 * MPSAFE
261 */
b3ce8a64
MD
262int
263sys_clock_getres(struct clock_getres_args *uap)
264{
265 int error;
266 struct timespec ts;
267
268 error = kern_clock_getres(uap->clock_id, &ts);
269 if (error == 0)
270 error = copyout(&ts, uap->tp, sizeof(ts));
271
272 return (error);
984263bc
MD
273}
274
88c4d2f6
MD
275/*
276 * nanosleep1()
277 *
278 * This is a general helper function for nanosleep() (aka sleep() aka
279 * usleep()).
280 *
281 * If there is less then one tick's worth of time left and
282 * we haven't done a yield, or the remaining microseconds is
283 * ridiculously low, do a yield. This avoids having
284 * to deal with systimer overheads when the system is under
285 * heavy loads. If we have done a yield already then use
286 * a systimer and an uninterruptable thread wait.
287 *
288 * If there is more then a tick's worth of time left,
289 * calculate the baseline ticks and use an interruptable
290 * tsleep, then handle the fine-grained delay on the next
291 * loop. This usually results in two sleeps occuring, a long one
292 * and a short one.
3919ced0
MD
293 *
294 * MPSAFE
88c4d2f6
MD
295 */
296static void
96d52ac8
SZ
297ns1_systimer(systimer_t info, int in_ipi __unused,
298 struct intrframe *frame __unused)
88c4d2f6
MD
299{
300 lwkt_schedule(info->data);
301}
984263bc 302
8ba5f7ef 303int
41c20dac 304nanosleep1(struct timespec *rqt, struct timespec *rmt)
984263bc 305{
88c4d2f6 306 static int nanowait;
984263bc
MD
307 struct timespec ts, ts2, ts3;
308 struct timeval tv;
309 int error;
310
311 if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000)
312 return (EINVAL);
8ba5f7ef 313 /* XXX: imho this should return EINVAL at least for tv_sec < 0 */
984263bc
MD
314 if (rqt->tv_sec < 0 || (rqt->tv_sec == 0 && rqt->tv_nsec == 0))
315 return (0);
a94976ad
MD
316 nanouptime(&ts);
317 timespecadd(&ts, rqt); /* ts = target timestamp compare */
318 TIMESPEC_TO_TIMEVAL(&tv, rqt); /* tv = sleep interval */
88c4d2f6 319
984263bc 320 for (;;) {
88c4d2f6
MD
321 int ticks;
322 struct systimer info;
323
a591f597 324 ticks = tv.tv_usec / ustick; /* approximate */
a94976ad 325
88c4d2f6 326 if (tv.tv_sec == 0 && ticks == 0) {
37af14fe 327 thread_t td = curthread;
3b58baa0
MD
328 if (tv.tv_usec > 0 && tv.tv_usec < nanosleep_min_us)
329 tv.tv_usec = nanosleep_min_us;
330 if (tv.tv_usec < nanosleep_hard_us) {
f9235b6d 331 lwkt_user_yield();
3b58baa0 332 cpu_pause();
a94976ad 333 } else {
37af14fe 334 crit_enter_quick(td);
88c4d2f6 335 systimer_init_oneshot(&info, ns1_systimer,
37af14fe
MD
336 td, tv.tv_usec);
337 lwkt_deschedule_self(td);
338 crit_exit_quick(td);
88c4d2f6
MD
339 lwkt_switch();
340 systimer_del(&info); /* make sure it's gone */
a94976ad 341 }
08f2f1bb 342 error = iscaught(td->td_lwp);
88c4d2f6
MD
343 } else if (tv.tv_sec == 0) {
344 error = tsleep(&nanowait, PCATCH, "nanslp", ticks);
a94976ad 345 } else {
88c4d2f6
MD
346 ticks = tvtohz_low(&tv); /* also handles overflow */
347 error = tsleep(&nanowait, PCATCH, "nanslp", ticks);
a94976ad
MD
348 }
349 nanouptime(&ts2);
88c4d2f6 350 if (error && error != EWOULDBLOCK) {
984263bc
MD
351 if (error == ERESTART)
352 error = EINTR;
353 if (rmt != NULL) {
354 timespecsub(&ts, &ts2);
355 if (ts.tv_sec < 0)
356 timespecclear(&ts);
357 *rmt = ts;
358 }
359 return (error);
360 }
361 if (timespeccmp(&ts2, &ts, >=))
362 return (0);
363 ts3 = ts;
364 timespecsub(&ts3, &ts2);
365 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
366 }
367}
368
3919ced0
MD
369/*
370 * MPSAFE
371 */
984263bc 372int
753fd850 373sys_nanosleep(struct nanosleep_args *uap)
984263bc 374{
245e4f17 375 int error;
f9a13fc4
MD
376 struct timespec rqt;
377 struct timespec rmt;
984263bc 378
f9a13fc4 379 error = copyin(uap->rqtp, &rqt, sizeof(rqt));
984263bc
MD
380 if (error)
381 return (error);
f9a13fc4
MD
382
383 error = nanosleep1(&rqt, &rmt);
384
245e4f17 385 /*
f9a13fc4 386 * copyout the residual if nanosleep was interrupted.
245e4f17 387 */
55d25c87
SS
388 if (error && uap->rmtp) {
389 int error2;
390
391 error2 = copyout(&rmt, uap->rmtp, sizeof(rmt));
392 if (error2)
393 error = error2;
394 }
984263bc
MD
395 return (error);
396}
397
3919ced0
MD
398/*
399 * MPSAFE
400 */
984263bc 401int
753fd850 402sys_gettimeofday(struct gettimeofday_args *uap)
984263bc
MD
403{
404 struct timeval atv;
405 int error = 0;
406
407 if (uap->tp) {
408 microtime(&atv);
409 if ((error = copyout((caddr_t)&atv, (caddr_t)uap->tp,
410 sizeof (atv))))
411 return (error);
412 }
413 if (uap->tzp)
414 error = copyout((caddr_t)&tz, (caddr_t)uap->tzp,
415 sizeof (tz));
416 return (error);
417}
418
3919ced0
MD
419/*
420 * MPALMOSTSAFE
421 */
984263bc 422int
753fd850 423sys_settimeofday(struct settimeofday_args *uap)
984263bc 424{
dadab5e9 425 struct thread *td = curthread;
984263bc
MD
426 struct timeval atv;
427 struct timezone atz;
428 int error;
429
cc125f38 430 if ((error = priv_check(td, PRIV_SETTIMEOFDAY)))
984263bc
MD
431 return (error);
432 /* Verify all parameters before changing time. */
433 if (uap->tv) {
434 if ((error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
435 sizeof(atv))))
436 return (error);
437 if (atv.tv_usec < 0 || atv.tv_usec >= 1000000)
438 return (EINVAL);
439 }
440 if (uap->tzp &&
441 (error = copyin((caddr_t)uap->tzp, (caddr_t)&atz, sizeof(atz))))
442 return (error);
3919ced0
MD
443
444 get_mplock();
445 if (uap->tv && (error = settime(&atv))) {
446 rel_mplock();
984263bc 447 return (error);
3919ced0
MD
448 }
449 rel_mplock();
984263bc
MD
450 if (uap->tzp)
451 tz = atz;
452 return (0);
453}
454
4026c000
JS
455static void
456kern_adjtime_common(void)
457{
458 if ((ntp_delta >= 0 && ntp_delta < ntp_default_tick_delta) ||
7df7080b 459 (ntp_delta < 0 && ntp_delta > -ntp_default_tick_delta))
4026c000
JS
460 ntp_tick_delta = ntp_delta;
461 else if (ntp_delta > ntp_big_delta)
462 ntp_tick_delta = 10 * ntp_default_tick_delta;
463 else if (ntp_delta < -ntp_big_delta)
464 ntp_tick_delta = -10 * ntp_default_tick_delta;
465 else if (ntp_delta > 0)
466 ntp_tick_delta = ntp_default_tick_delta;
467 else
468 ntp_tick_delta = -ntp_default_tick_delta;
469}
470
471void
472kern_adjtime(int64_t delta, int64_t *odelta)
473{
474 int origcpu;
475
157202af 476 if ((origcpu = mycpu->gd_cpuid) != 0)
4026c000 477 lwkt_setcpu_self(globaldata_find(0));
4026c000
JS
478
479 crit_enter();
480 *odelta = ntp_delta;
08f95c49 481 ntp_delta = delta;
4026c000
JS
482 kern_adjtime_common();
483 crit_exit();
484
157202af 485 if (origcpu != 0)
4026c000 486 lwkt_setcpu_self(globaldata_find(origcpu));
4026c000
JS
487}
488
b6da4cbb
JS
489static void
490kern_get_ntp_delta(int64_t *delta)
491{
492 int origcpu;
493
494 if ((origcpu = mycpu->gd_cpuid) != 0)
495 lwkt_setcpu_self(globaldata_find(0));
496
497 crit_enter();
498 *delta = ntp_delta;
499 crit_exit();
500
501 if (origcpu != 0)
502 lwkt_setcpu_self(globaldata_find(origcpu));
503}
504
4026c000
JS
505void
506kern_reladjtime(int64_t delta)
507{
508 int origcpu;
509
157202af 510 if ((origcpu = mycpu->gd_cpuid) != 0)
4026c000 511 lwkt_setcpu_self(globaldata_find(0));
4026c000
JS
512
513 crit_enter();
514 ntp_delta += delta;
515 kern_adjtime_common();
516 crit_exit();
517
157202af 518 if (origcpu != 0)
4026c000 519 lwkt_setcpu_self(globaldata_find(origcpu));
4026c000 520}
984263bc 521
0143455b
JS
522static void
523kern_adjfreq(int64_t rate)
524{
525 int origcpu;
526
157202af 527 if ((origcpu = mycpu->gd_cpuid) != 0)
0143455b 528 lwkt_setcpu_self(globaldata_find(0));
0143455b
JS
529
530 crit_enter();
531 ntp_tick_permanent = rate;
532 crit_exit();
533
157202af 534 if (origcpu != 0)
0143455b 535 lwkt_setcpu_self(globaldata_find(origcpu));
0143455b
JS
536}
537
3919ced0
MD
538/*
539 * MPALMOSTSAFE
540 */
984263bc 541int
753fd850 542sys_adjtime(struct adjtime_args *uap)
984263bc 543{
dadab5e9 544 struct thread *td = curthread;
984263bc 545 struct timeval atv;
4026c000 546 int64_t ndelta, odelta;
88c4d2f6 547 int error;
984263bc 548
cc125f38 549 if ((error = priv_check(td, PRIV_ADJTIME)))
984263bc 550 return (error);
3919ced0
MD
551 error = copyin(uap->delta, &atv, sizeof(struct timeval));
552 if (error)
984263bc
MD
553 return (error);
554
555 /*
556 * Compute the total correction and the rate at which to apply it.
557 * Round the adjustment down to a whole multiple of the per-tick
558 * delta, so that after some number of incremental changes in
559 * hardclock(), tickdelta will become zero, lest the correction
560 * overshoot and start taking us away from the desired final time.
561 */
08f95c49 562 ndelta = (int64_t)atv.tv_sec * 1000000000 + atv.tv_usec * 1000;
3919ced0 563 get_mplock();
4026c000 564 kern_adjtime(ndelta, &odelta);
3919ced0 565 rel_mplock();
984263bc
MD
566
567 if (uap->olddelta) {
4026c000 568 atv.tv_sec = odelta / 1000000000;
a821f7fc 569 atv.tv_usec = odelta % 1000000000 / 1000;
3919ced0 570 copyout(&atv, uap->olddelta, sizeof(struct timeval));
984263bc
MD
571 }
572 return (0);
573}
574
4026c000
JS
575static int
576sysctl_adjtime(SYSCTL_HANDLER_ARGS)
577{
578 int64_t delta;
579 int error;
580
4026c000 581 if (req->newptr != NULL) {
895c1f85 582 if (priv_check(curthread, PRIV_ROOT))
4026c000
JS
583 return (EPERM);
584 error = SYSCTL_IN(req, &delta, sizeof(delta));
585 if (error)
586 return (error);
587 kern_reladjtime(delta);
588 }
5eb5a6bc
MD
589
590 if (req->oldptr)
591 kern_get_ntp_delta(&delta);
592 error = SYSCTL_OUT(req, &delta, sizeof(delta));
593 return (error);
4026c000
JS
594}
595
08f95c49
MD
596/*
597 * delta is in nanoseconds.
598 */
0143455b 599static int
b6da4cbb
JS
600sysctl_delta(SYSCTL_HANDLER_ARGS)
601{
602 int64_t delta, old_delta;
603 int error;
604
605 if (req->newptr != NULL) {
895c1f85 606 if (priv_check(curthread, PRIV_ROOT))
b6da4cbb
JS
607 return (EPERM);
608 error = SYSCTL_IN(req, &delta, sizeof(delta));
609 if (error)
610 return (error);
611 kern_adjtime(delta, &old_delta);
b6da4cbb
JS
612 }
613
5eb5a6bc
MD
614 if (req->oldptr != NULL)
615 kern_get_ntp_delta(&old_delta);
cebaad99 616 error = SYSCTL_OUT(req, &old_delta, sizeof(old_delta));
5eb5a6bc 617 return (error);
b6da4cbb
JS
618}
619
08f95c49
MD
620/*
621 * frequency is in nanoseconds per second shifted left 32.
622 * kern_adjfreq() needs it in nanoseconds per tick shifted left 32.
623 */
b6da4cbb 624static int
0143455b
JS
625sysctl_adjfreq(SYSCTL_HANDLER_ARGS)
626{
627 int64_t freqdelta;
628 int error;
629
0143455b 630 if (req->newptr != NULL) {
895c1f85 631 if (priv_check(curthread, PRIV_ROOT))
0143455b
JS
632 return (EPERM);
633 error = SYSCTL_IN(req, &freqdelta, sizeof(freqdelta));
634 if (error)
635 return (error);
636
637 freqdelta /= hz;
638 kern_adjfreq(freqdelta);
639 }
5eb5a6bc
MD
640
641 if (req->oldptr != NULL)
642 freqdelta = ntp_tick_permanent * hz;
643 error = SYSCTL_OUT(req, &freqdelta, sizeof(freqdelta));
644 if (error)
645 return (error);
646
0143455b
JS
647 return (0);
648}
649
4026c000 650SYSCTL_NODE(_kern, OID_AUTO, ntp, CTLFLAG_RW, 0, "NTP related controls");
5eb5a6bc
MD
651SYSCTL_PROC(_kern_ntp, OID_AUTO, permanent,
652 CTLTYPE_QUAD|CTLFLAG_RW, 0, 0,
cebaad99 653 sysctl_adjfreq, "Q", "permanent correction per second");
b6da4cbb 654SYSCTL_PROC(_kern_ntp, OID_AUTO, delta,
5eb5a6bc 655 CTLTYPE_QUAD|CTLFLAG_RW, 0, 0,
cebaad99 656 sysctl_delta, "Q", "one-time delta");
5eb5a6bc
MD
657SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, big_delta, CTLFLAG_RD,
658 &ntp_big_delta, sizeof(ntp_big_delta), "Q",
659 "threshold for fast adjustment");
660SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, tick_delta, CTLFLAG_RD,
661 &ntp_tick_delta, sizeof(ntp_tick_delta), "LU",
662 "per-tick adjustment");
663SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, default_tick_delta, CTLFLAG_RD,
664 &ntp_default_tick_delta, sizeof(ntp_default_tick_delta), "LU",
665 "default per-tick adjustment");
48590578 666SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, next_leap_second, CTLFLAG_RW,
5eb5a6bc
MD
667 &ntp_leap_second, sizeof(ntp_leap_second), "LU",
668 "next leap second");
48590578
JS
669SYSCTL_INT(_kern_ntp, OID_AUTO, insert_leap_second, CTLFLAG_RW,
670 &ntp_leap_insert, 0, "insert or remove leap second");
4026c000 671SYSCTL_PROC(_kern_ntp, OID_AUTO, adjust,
cebaad99
JS
672 CTLTYPE_QUAD|CTLFLAG_RW, 0, 0,
673 sysctl_adjtime, "Q", "relative adjust for delta");
4026c000 674
984263bc
MD
675/*
676 * Get value of an interval timer. The process virtual and
677 * profiling virtual time timers are kept in the p_stats area, since
678 * they can be swapped out. These are kept internally in the
679 * way they are specified externally: in time until they expire.
680 *
681 * The real time interval timer is kept in the process table slot
682 * for the process, and its value (it_value) is kept as an
683 * absolute time rather than as a delta, so that it is easy to keep
684 * periodic real-time signals from drifting.
685 *
686 * Virtual time timers are processed in the hardclock() routine of
687 * kern_clock.c. The real time timer is processed by a timeout
688 * routine, called from the softclock() routine. Since a callout
689 * may be delayed in real time due to interrupt processing in the system,
690 * it is possible for the real time timeout routine (realitexpire, given below),
691 * to be delayed in real time past when it is supposed to occur. It
692 * does not suffice, therefore, to reload the real timer .it_value from the
693 * real time timers .it_interval. Rather, we compute the next time in
694 * absolute time the timer should go off.
3919ced0
MD
695 *
696 * MPALMOSTSAFE
984263bc 697 */
984263bc 698int
753fd850 699sys_getitimer(struct getitimer_args *uap)
984263bc 700{
41c20dac 701 struct proc *p = curproc;
984263bc
MD
702 struct timeval ctv;
703 struct itimerval aitv;
984263bc
MD
704
705 if (uap->which > ITIMER_PROF)
706 return (EINVAL);
d7f4c458 707 lwkt_gettoken(&p->p_token);
984263bc
MD
708 if (uap->which == ITIMER_REAL) {
709 /*
710 * Convert from absolute to relative time in .it_value
711 * part of real time timer. If time for real time timer
712 * has passed return 0, else return difference between
713 * current time and time for the timer to go off.
714 */
715 aitv = p->p_realtimer;
716 if (timevalisset(&aitv.it_value)) {
717 getmicrouptime(&ctv);
718 if (timevalcmp(&aitv.it_value, &ctv, <))
719 timevalclear(&aitv.it_value);
720 else
721 timevalsub(&aitv.it_value, &ctv);
722 }
88c4d2f6 723 } else {
93328593 724 aitv = p->p_timer[uap->which];
88c4d2f6 725 }
d7f4c458 726 lwkt_reltoken(&p->p_token);
3919ced0 727 return (copyout(&aitv, uap->itv, sizeof (struct itimerval)));
984263bc
MD
728}
729
3919ced0
MD
730/*
731 * MPALMOSTSAFE
732 */
984263bc 733int
753fd850 734sys_setitimer(struct setitimer_args *uap)
984263bc
MD
735{
736 struct itimerval aitv;
737 struct timeval ctv;
41c20dac
MD
738 struct itimerval *itvp;
739 struct proc *p = curproc;
88c4d2f6 740 int error;
984263bc
MD
741
742 if (uap->which > ITIMER_PROF)
743 return (EINVAL);
744 itvp = uap->itv;
745 if (itvp && (error = copyin((caddr_t)itvp, (caddr_t)&aitv,
746 sizeof(struct itimerval))))
747 return (error);
748 if ((uap->itv = uap->oitv) &&
753fd850 749 (error = sys_getitimer((struct getitimer_args *)uap)))
984263bc
MD
750 return (error);
751 if (itvp == 0)
752 return (0);
753 if (itimerfix(&aitv.it_value))
754 return (EINVAL);
755 if (!timevalisset(&aitv.it_value))
756 timevalclear(&aitv.it_interval);
757 else if (itimerfix(&aitv.it_interval))
758 return (EINVAL);
d7f4c458 759 lwkt_gettoken(&p->p_token);
984263bc
MD
760 if (uap->which == ITIMER_REAL) {
761 if (timevalisset(&p->p_realtimer.it_value))
8fbf9130 762 callout_stop(&p->p_ithandle);
984263bc 763 if (timevalisset(&aitv.it_value))
8fbf9130
JS
764 callout_reset(&p->p_ithandle,
765 tvtohz_high(&aitv.it_value), realitexpire, p);
984263bc
MD
766 getmicrouptime(&ctv);
767 timevaladd(&aitv.it_value, &ctv);
768 p->p_realtimer = aitv;
88c4d2f6 769 } else {
93328593 770 p->p_timer[uap->which] = aitv;
898e34b3
MD
771 switch(uap->which) {
772 case ITIMER_VIRTUAL:
4643740a 773 p->p_flags &= ~P_SIGVTALRM;
898e34b3
MD
774 break;
775 case ITIMER_PROF:
4643740a 776 p->p_flags &= ~P_SIGPROF;
898e34b3
MD
777 break;
778 }
88c4d2f6 779 }
d7f4c458 780 lwkt_reltoken(&p->p_token);
984263bc
MD
781 return (0);
782}
783
784/*
785 * Real interval timer expired:
786 * send process whose timer expired an alarm signal.
787 * If time is not set up to reload, then just return.
788 * Else compute next time timer should go off which is > current time.
789 * This is where delay in processing this timeout causes multiple
790 * SIGALRM calls to be compressed into one.
a94976ad 791 * tvtohz_high() always adds 1 to allow for the time until the next clock
984263bc
MD
792 * interrupt being strictly less than 1 clock tick, but we don't want
793 * that here since we want to appear to be in sync with the clock
794 * interrupt even when we're delayed.
795 */
796void
c972a82f 797realitexpire(void *arg)
984263bc 798{
1fd87d54 799 struct proc *p;
984263bc 800 struct timeval ctv, ntv;
984263bc
MD
801
802 p = (struct proc *)arg;
d7f4c458 803 lwkt_gettoken(&p->p_token);
84204577 804 ksignal(p, SIGALRM);
984263bc
MD
805 if (!timevalisset(&p->p_realtimer.it_interval)) {
806 timevalclear(&p->p_realtimer.it_value);
d7f4c458 807 lwkt_reltoken(&p->p_token);
984263bc
MD
808 return;
809 }
810 for (;;) {
984263bc 811 timevaladd(&p->p_realtimer.it_value,
d7f4c458 812 &p->p_realtimer.it_interval);
984263bc
MD
813 getmicrouptime(&ctv);
814 if (timevalcmp(&p->p_realtimer.it_value, &ctv, >)) {
815 ntv = p->p_realtimer.it_value;
816 timevalsub(&ntv, &ctv);
8fbf9130
JS
817 callout_reset(&p->p_ithandle, tvtohz_low(&ntv),
818 realitexpire, p);
d7f4c458 819 lwkt_reltoken(&p->p_token);
984263bc
MD
820 return;
821 }
984263bc 822 }
d7f4c458 823 lwkt_reltoken(&p->p_token);
984263bc
MD
824}
825
826/*
827 * Check that a proposed value to load into the .it_value or
828 * .it_interval part of an interval timer is acceptable, and
829 * fix it to have at least minimal value (i.e. if it is less
830 * than the resolution of the clock, round it up.)
3919ced0
MD
831 *
832 * MPSAFE
984263bc
MD
833 */
834int
c972a82f 835itimerfix(struct timeval *tv)
984263bc
MD
836{
837
838 if (tv->tv_sec < 0 || tv->tv_sec > 100000000 ||
839 tv->tv_usec < 0 || tv->tv_usec >= 1000000)
840 return (EINVAL);
a591f597
MD
841 if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < ustick)
842 tv->tv_usec = ustick;
984263bc
MD
843 return (0);
844}
845
846/*
847 * Decrement an interval timer by a specified number
848 * of microseconds, which must be less than a second,
849 * i.e. < 1000000. If the timer expires, then reload
850 * it. In this case, carry over (usec - old value) to
851 * reduce the value reloaded into the timer so that
852 * the timer does not drift. This routine assumes
853 * that it is called in a context where the timers
854 * on which it is operating cannot change in value.
855 */
856int
c972a82f 857itimerdecr(struct itimerval *itp, int usec)
984263bc
MD
858{
859
860 if (itp->it_value.tv_usec < usec) {
861 if (itp->it_value.tv_sec == 0) {
862 /* expired, and already in next interval */
863 usec -= itp->it_value.tv_usec;
864 goto expire;
865 }
866 itp->it_value.tv_usec += 1000000;
867 itp->it_value.tv_sec--;
868 }
869 itp->it_value.tv_usec -= usec;
870 usec = 0;
871 if (timevalisset(&itp->it_value))
872 return (1);
873 /* expired, exactly at end of interval */
874expire:
875 if (timevalisset(&itp->it_interval)) {
876 itp->it_value = itp->it_interval;
877 itp->it_value.tv_usec -= usec;
878 if (itp->it_value.tv_usec < 0) {
879 itp->it_value.tv_usec += 1000000;
880 itp->it_value.tv_sec--;
881 }
882 } else
883 itp->it_value.tv_usec = 0; /* sec is already 0 */
884 return (0);
885}
886
887/*
888 * Add and subtract routines for timevals.
889 * N.B.: subtract routine doesn't deal with
890 * results which are before the beginning,
891 * it just gets very confused in this case.
892 * Caveat emptor.
893 */
894void
9deadd02 895timevaladd(struct timeval *t1, const struct timeval *t2)
984263bc
MD
896{
897
898 t1->tv_sec += t2->tv_sec;
899 t1->tv_usec += t2->tv_usec;
900 timevalfix(t1);
901}
902
903void
9deadd02 904timevalsub(struct timeval *t1, const struct timeval *t2)
984263bc
MD
905{
906
907 t1->tv_sec -= t2->tv_sec;
908 t1->tv_usec -= t2->tv_usec;
909 timevalfix(t1);
910}
911
912static void
c972a82f 913timevalfix(struct timeval *t1)
984263bc
MD
914{
915
916 if (t1->tv_usec < 0) {
917 t1->tv_sec--;
918 t1->tv_usec += 1000000;
919 }
920 if (t1->tv_usec >= 1000000) {
921 t1->tv_sec++;
922 t1->tv_usec -= 1000000;
923 }
924}
cea4446f
HP
925
926/*
927 * ratecheck(): simple time-based rate-limit checking.
928 */
929int
930ratecheck(struct timeval *lasttime, const struct timeval *mininterval)
931{
932 struct timeval tv, delta;
933 int rv = 0;
934
935 getmicrouptime(&tv); /* NB: 10ms precision */
936 delta = tv;
937 timevalsub(&delta, lasttime);
938
939 /*
940 * check for 0,0 is so that the message will be seen at least once,
941 * even if interval is huge.
942 */
943 if (timevalcmp(&delta, mininterval, >=) ||
944 (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) {
945 *lasttime = tv;
946 rv = 1;
947 }
948
949 return (rv);
950}
951
952/*
953 * ppsratecheck(): packets (or events) per second limitation.
954 *
955 * Return 0 if the limit is to be enforced (e.g. the caller
956 * should drop a packet because of the rate limitation).
957 *
958 * maxpps of 0 always causes zero to be returned. maxpps of -1
959 * always causes 1 to be returned; this effectively defeats rate
960 * limiting.
961 *
962 * Note that we maintain the struct timeval for compatibility
963 * with other bsd systems. We reuse the storage and just monitor
964 * clock ticks for minimal overhead.
965 */
966int
967ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps)
968{
969 int now;
970
971 /*
972 * Reset the last time and counter if this is the first call
973 * or more than a second has passed since the last update of
974 * lasttime.
975 */
976 now = ticks;
977 if (lasttime->tv_sec == 0 || (u_int)(now - lasttime->tv_sec) >= hz) {
978 lasttime->tv_sec = now;
979 *curpps = 1;
980 return (maxpps != 0);
981 } else {
982 (*curpps)++; /* NB: ignore potential overflow */
983 return (maxpps < 0 || *curpps < maxpps);
984 }
985}
986