Note that Jeff indicated to me that Jonathan Lemon gave his permission to
[dragonfly.git] / sys / kern / kern_clock.c
CommitLineData
984263bc 1/*-
033a4603 2 * Copyright (c) 2004 Matthew Dillon <dillon@backplane.com>
984263bc
MD
3 * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org>
4 * Copyright (c) 1982, 1986, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
41 * $FreeBSD: src/sys/kern/kern_clock.c,v 1.105.2.10 2002/10/17 13:19:40 maxim Exp $
033a4603 42 * $DragonFly: src/sys/kern/kern_clock.c,v 1.20 2004/06/28 02:57:11 drhodus Exp $
984263bc
MD
43 */
44
45#include "opt_ntp.h"
46
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/dkstat.h>
50#include <sys/callout.h>
51#include <sys/kernel.h>
52#include <sys/proc.h>
53#include <sys/malloc.h>
54#include <sys/resourcevar.h>
55#include <sys/signalvar.h>
56#include <sys/timex.h>
57#include <sys/timepps.h>
58#include <vm/vm.h>
59#include <sys/lock.h>
60#include <vm/pmap.h>
61#include <vm/vm_map.h>
62#include <sys/sysctl.h>
2689779e 63#include <sys/thread2.h>
984263bc
MD
64
65#include <machine/cpu.h>
66#include <machine/limits.h>
67#include <machine/smp.h>
68
69#ifdef GPROF
70#include <sys/gmon.h>
71#endif
72
73#ifdef DEVICE_POLLING
74extern void init_device_poll(void);
75extern void hardclock_device_poll(void);
76#endif /* DEVICE_POLLING */
77
402ed7e1 78static void initclocks (void *dummy);
984263bc
MD
79SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL)
80
6ad39cae
MD
81/*
82 * Some of these don't belong here, but it's easiest to concentrate them.
83 * Note that cp_time[] counts in microseconds, but most userland programs
84 * just compare relative times against the total by delta.
85 */
984263bc
MD
86long cp_time[CPUSTATES];
87
88SYSCTL_OPAQUE(_kern, OID_AUTO, cp_time, CTLFLAG_RD, &cp_time, sizeof(cp_time),
89 "LU", "CPU time statistics");
90
91long tk_cancc;
92long tk_nin;
93long tk_nout;
94long tk_rawcc;
95
88c4d2f6
MD
96/*
97 * boottime is used to calculate the 'real' uptime. Do not confuse this with
98 * microuptime(). microtime() is not drift compensated. The real uptime
60b2809b
MD
99 * with compensation is nanotime() - bootime. boottime is recalculated
100 * whenever the real time is set based on the compensated elapsed time
101 * in seconds (gd->gd_time_seconds).
88c4d2f6
MD
102 *
103 * basetime is used to calculate the compensated real time of day. Chunky
104 * changes to the time, aka settimeofday(), are made by modifying basetime.
105 *
106 * The gd_time_seconds and gd_cpuclock_base fields remain fairly monotonic.
107 * Slight adjustments to gd_cpuclock_base are made to phase-lock it to
108 * the real time.
109 */
110struct timespec boottime; /* boot time (realtime) for reference only */
111struct timespec basetime; /* base time adjusts uptime -> realtime */
112time_t time_second; /* read-only 'passive' uptime in seconds */
984263bc 113
984263bc
MD
114SYSCTL_STRUCT(_kern, KERN_BOOTTIME, boottime, CTLFLAG_RD,
115 &boottime, timeval, "System boottime");
88c4d2f6
MD
116SYSCTL_STRUCT(_kern, OID_AUTO, basetime, CTLFLAG_RD,
117 &basetime, timeval, "System basetime");
984263bc 118
88c4d2f6
MD
119static void hardclock(systimer_t info, struct intrframe *frame);
120static void statclock(systimer_t info, struct intrframe *frame);
121static void schedclock(systimer_t info, struct intrframe *frame);
122
123int ticks; /* system master ticks at hz */
124int64_t nsec_adj; /* ntpd per-tick adjustment in nsec << 32 */
125int64_t nsec_acc; /* accumulator */
984263bc
MD
126
127/*
88c4d2f6 128 * Finish initializing clock frequencies and start all clocks running.
984263bc 129 */
88c4d2f6
MD
130/* ARGSUSED*/
131static void
132initclocks(void *dummy)
984263bc 133{
88c4d2f6
MD
134 cpu_initclocks();
135#ifdef DEVICE_POLLING
136 init_device_poll();
137#endif
138 /*psratio = profhz / stathz;*/
139 initclocks_pcpu();
984263bc
MD
140}
141
88c4d2f6
MD
142/*
143 * Called on a per-cpu basis
144 */
145void
146initclocks_pcpu(void)
147{
148 struct globaldata *gd = mycpu;
984263bc 149
88c4d2f6
MD
150 crit_enter();
151 if (gd->gd_cpuid == 0) {
152 gd->gd_time_seconds = 1;
153 gd->gd_cpuclock_base = cputimer_count();
154 } else {
155 /* XXX */
156 gd->gd_time_seconds = globaldata_find(0)->gd_time_seconds;
157 gd->gd_cpuclock_base = globaldata_find(0)->gd_cpuclock_base;
158 }
159 systimer_init_periodic(&gd->gd_hardclock, hardclock, NULL, hz);
160 systimer_init_periodic(&gd->gd_statclock, statclock, NULL, stathz);
161 /* XXX correct the frequency for scheduler / estcpu tests */
8478264a
MD
162 systimer_init_periodic(&gd->gd_schedclock, schedclock,
163 NULL, ESTCPUFREQ);
88c4d2f6
MD
164 crit_exit();
165}
984263bc
MD
166
167/*
88c4d2f6
MD
168 * This sets the current real time of day. Timespecs are in seconds and
169 * nanoseconds. We do not mess with gd_time_seconds and gd_cpuclock_base,
170 * instead we adjust basetime so basetime + gd_* results in the current
171 * time of day. This way the gd_* fields are guarenteed to represent
172 * a monotonically increasing 'uptime' value.
984263bc 173 */
88c4d2f6
MD
174void
175set_timeofday(struct timespec *ts)
176{
177 struct timespec ts2;
984263bc 178
88c4d2f6
MD
179 /*
180 * XXX SMP / non-atomic basetime updates
181 */
182 crit_enter();
183 nanouptime(&ts2);
184 basetime.tv_sec = ts->tv_sec - ts2.tv_sec;
185 basetime.tv_nsec = ts->tv_nsec - ts2.tv_nsec;
186 if (basetime.tv_nsec < 0) {
187 basetime.tv_nsec += 1000000000;
188 --basetime.tv_sec;
189 }
60b2809b 190 boottime.tv_sec = basetime.tv_sec - mycpu->gd_time_seconds;
88c4d2f6
MD
191 timedelta = 0;
192 crit_exit();
193}
194
984263bc 195/*
88c4d2f6
MD
196 * Each cpu has its own hardclock, but we only increments ticks and softticks
197 * on cpu #0.
198 *
199 * NOTE! systimer! the MP lock might not be held here. We can only safely
200 * manipulate objects owned by the current cpu.
984263bc 201 */
984263bc 202static void
88c4d2f6 203hardclock(systimer_t info, struct intrframe *frame)
984263bc 204{
88c4d2f6
MD
205 sysclock_t cputicks;
206 struct proc *p;
207 struct pstats *pstats;
208 struct globaldata *gd = mycpu;
984263bc
MD
209
210 /*
88c4d2f6
MD
211 * Realtime updates are per-cpu. Note that timer corrections as
212 * returned by microtime() and friends make an additional adjustment
213 * using a system-wise 'basetime', but the running time is always
214 * taken from the per-cpu globaldata area. Since the same clock
215 * is distributing (XXX SMP) to all cpus, the per-cpu timebases
216 * stay in synch.
217 *
218 * Note that we never allow info->time (aka gd->gd_hardclock.time)
219 * to reverse index gd_cpuclock_base.
984263bc 220 */
88c4d2f6
MD
221 cputicks = info->time - gd->gd_cpuclock_base;
222 if (cputicks > cputimer_freq) {
223 ++gd->gd_time_seconds;
224 gd->gd_cpuclock_base += cputimer_freq;
225 }
984263bc
MD
226
227 /*
88c4d2f6
MD
228 * The system-wide ticks and softticks are only updated by cpu #0.
229 * Callwheel actions are also (at the moment) only handled by cpu #0.
230 * Finally, we also do NTP related timedelta/tickdelta adjustments
231 * by adjusting basetime.
984263bc 232 */
88c4d2f6
MD
233 if (gd->gd_cpuid == 0) {
234 struct timespec nts;
235 int leap;
984263bc 236
88c4d2f6 237 ++ticks;
984263bc 238
88c4d2f6
MD
239#ifdef DEVICE_POLLING
240 hardclock_device_poll(); /* mpsafe, short and quick */
241#endif /* DEVICE_POLLING */
984263bc 242
88c4d2f6
MD
243 if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) {
244 setsoftclock();
245 } else if (softticks + 1 == ticks) {
246 ++softticks;
247 }
248
249#if 0
250 if (tco->tc_poll_pps)
251 tco->tc_poll_pps(tco);
252#endif
253 /*
254 * Apply adjtime corrections. At the moment only do this if
255 * we can get the MP lock to interlock with adjtime's modification
256 * of these variables. Note that basetime adjustments are not
257 * MP safe either XXX.
258 */
259 if (timedelta != 0 && try_mplock()) {
260 basetime.tv_nsec += tickdelta * 1000;
261 if (basetime.tv_nsec >= 1000000000) {
262 basetime.tv_nsec -= 1000000000;
263 ++basetime.tv_sec;
264 } else if (basetime.tv_nsec < 0) {
265 basetime.tv_nsec += 1000000000;
266 --basetime.tv_sec;
267 }
268 timedelta -= tickdelta;
269 rel_mplock();
270 }
271
272 /*
273 * Apply per-tick compensation. ticks_adj adjusts for both
274 * offset and frequency, and could be negative.
275 */
276 if (nsec_adj != 0 && try_mplock()) {
277 nsec_acc += nsec_adj;
278 if (nsec_acc >= 0x100000000LL) {
279 basetime.tv_nsec += nsec_acc >> 32;
280 nsec_acc = (nsec_acc & 0xFFFFFFFFLL);
281 } else if (nsec_acc <= -0x100000000LL) {
282 basetime.tv_nsec -= -nsec_acc >> 32;
283 nsec_acc = -(-nsec_acc & 0xFFFFFFFFLL);
284 }
285 if (basetime.tv_nsec >= 1000000000) {
286 basetime.tv_nsec -= 1000000000;
287 ++basetime.tv_sec;
288 } else if (basetime.tv_nsec < 0) {
289 basetime.tv_nsec += 1000000000;
290 --basetime.tv_sec;
291 }
292 rel_mplock();
293 }
294
295 /*
296 * If the realtime-adjusted seconds hand rolls over then tell
297 * ntp_update_second() what we did in the last second so it can
298 * calculate what to do in the next second. It may also add
299 * or subtract a leap second.
300 */
301 getnanotime(&nts);
302 if (time_second != nts.tv_sec) {
303 leap = ntp_update_second(time_second, &nsec_adj);
304 basetime.tv_sec += leap;
305 time_second = nts.tv_sec + leap;
306 nsec_adj /= hz;
307 }
308 }
309
310 /*
311 * ITimer handling is per-tick, per-cpu. I don't think psignal()
312 * is mpsafe on curproc, so XXX get the mplock.
313 */
314 if ((p = curproc) != NULL && try_mplock()) {
984263bc 315 pstats = p->p_stats;
88c4d2f6 316 if (frame && CLKF_USERMODE(frame) &&
984263bc
MD
317 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
318 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
319 psignal(p, SIGVTALRM);
320 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
321 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
322 psignal(p, SIGPROF);
88c4d2f6 323 rel_mplock();
984263bc 324 }
604e1e09 325 setdelayed();
88c4d2f6 326}
984263bc 327
88c4d2f6
MD
328/*
329 * The statistics clock typically runs at a 125Hz rate, and is intended
330 * to be frequency offset from the hardclock (typ 100Hz). It is per-cpu.
331 *
332 * NOTE! systimer! the MP lock might not be held here. We can only safely
333 * manipulate objects owned by the current cpu.
334 *
335 * The stats clock is responsible for grabbing a profiling sample.
336 * Most of the statistics are only used by user-level statistics programs.
337 * The main exceptions are p->p_uticks, p->p_sticks, p->p_iticks, and
338 * p->p_estcpu.
339 *
340 * Like the other clocks, the stat clock is called from what is effectively
341 * a fast interrupt, so the context should be the thread/process that got
342 * interrupted.
343 */
344static void
345statclock(systimer_t info, struct intrframe *frame)
346{
347#ifdef GPROF
348 struct gmonparam *g;
349 int i;
984263bc 350#endif
88c4d2f6
MD
351 thread_t td;
352 struct proc *p;
353 int bump;
354 struct timeval tv;
355 struct timeval *stv;
984263bc
MD
356
357 /*
88c4d2f6 358 * How big was our timeslice relative to the last time?
984263bc 359 */
88c4d2f6
MD
360 microuptime(&tv); /* mpsafe */
361 stv = &mycpu->gd_stattv;
362 if (stv->tv_sec == 0) {
363 bump = 1;
364 } else {
365 bump = tv.tv_usec - stv->tv_usec +
366 (tv.tv_sec - stv->tv_sec) * 1000000;
367 if (bump < 0)
368 bump = 0;
369 if (bump > 1000000)
370 bump = 1000000;
371 }
372 *stv = tv;
984263bc 373
88c4d2f6
MD
374 td = curthread;
375 p = td->td_proc;
984263bc 376
88c4d2f6
MD
377 if (frame && CLKF_USERMODE(frame)) {
378 /*
379 * Came from userland, handle user time and deal with
380 * possible process.
381 */
382 if (p && (p->p_flag & P_PROFIL))
383 addupc_intr(p, CLKF_PC(frame), 1);
384 td->td_uticks += bump;
984263bc 385
88c4d2f6
MD
386 /*
387 * Charge the time as appropriate
388 */
389 if (p && p->p_nice > NZERO)
390 cp_time[CP_NICE] += bump;
391 else
392 cp_time[CP_USER] += bump;
393 } else {
394#ifdef GPROF
395 /*
396 * Kernel statistics are just like addupc_intr, only easier.
397 */
398 g = &_gmonparam;
399 if (g->state == GMON_PROF_ON && frame) {
400 i = CLKF_PC(frame) - g->lowpc;
401 if (i < g->textsize) {
402 i /= HISTFRACTION * sizeof(*g->kcount);
403 g->kcount[i]++;
404 }
405 }
406#endif
407 /*
408 * Came from kernel mode, so we were:
409 * - handling an interrupt,
410 * - doing syscall or trap work on behalf of the current
411 * user process, or
412 * - spinning in the idle loop.
413 * Whichever it is, charge the time as appropriate.
414 * Note that we charge interrupts to the current process,
415 * regardless of whether they are ``for'' that process,
416 * so that we know how much of its real time was spent
417 * in ``non-process'' (i.e., interrupt) work.
418 *
419 * XXX assume system if frame is NULL. A NULL frame
420 * can occur if ipi processing is done from an splx().
421 */
422 if (frame && CLKF_INTR(frame))
423 td->td_iticks += bump;
424 else
425 td->td_sticks += bump;
426
427 if (frame && CLKF_INTR(frame)) {
428 cp_time[CP_INTR] += bump;
429 } else {
430 if (td == &mycpu->gd_idlethread)
431 cp_time[CP_IDLE] += bump;
432 else
433 cp_time[CP_SYS] += bump;
434 }
435 }
436}
437
438/*
0a3f9b47 439 * The scheduler clock typically runs at a 20Hz rate. NOTE! systimer,
88c4d2f6
MD
440 * the MP lock might not be held. We can safely manipulate parts of curproc
441 * but that's about it.
442 */
443static void
444schedclock(systimer_t info, struct intrframe *frame)
445{
446 struct proc *p;
447 struct pstats *pstats;
448 struct rusage *ru;
449 struct vmspace *vm;
450 long rss;
451
452 schedulerclock(NULL); /* mpsafe */
453 if ((p = curproc) != NULL) {
454 /* Update resource usage integrals and maximums. */
455 if ((pstats = p->p_stats) != NULL &&
456 (ru = &pstats->p_ru) != NULL &&
457 (vm = p->p_vmspace) != NULL) {
458 ru->ru_ixrss += pgtok(vm->vm_tsize);
459 ru->ru_idrss += pgtok(vm->vm_dsize);
460 ru->ru_isrss += pgtok(vm->vm_ssize);
461 rss = pgtok(vmspace_resident_count(vm));
462 if (ru->ru_maxrss < rss)
463 ru->ru_maxrss = rss;
464 }
b68b7282 465 }
984263bc
MD
466}
467
468/*
a94976ad
MD
469 * Compute number of ticks for the specified amount of time. The
470 * return value is intended to be used in a clock interrupt timed
471 * operation and guarenteed to meet or exceed the requested time.
472 * If the representation overflows, return INT_MAX. The minimum return
473 * value is 1 ticks and the function will average the calculation up.
474 * If any value greater then 0 microseconds is supplied, a value
475 * of at least 2 will be returned to ensure that a near-term clock
476 * interrupt does not cause the timeout to occur (degenerately) early.
477 *
478 * Note that limit checks must take into account microseconds, which is
479 * done simply by using the smaller signed long maximum instead of
480 * the unsigned long maximum.
481 *
482 * If ints have 32 bits, then the maximum value for any timeout in
483 * 10ms ticks is 248 days.
984263bc
MD
484 */
485int
a94976ad 486tvtohz_high(struct timeval *tv)
984263bc 487{
a94976ad 488 int ticks;
1fd87d54 489 long sec, usec;
984263bc 490
984263bc
MD
491 sec = tv->tv_sec;
492 usec = tv->tv_usec;
493 if (usec < 0) {
494 sec--;
495 usec += 1000000;
496 }
497 if (sec < 0) {
498#ifdef DIAGNOSTIC
499 if (usec > 0) {
500 sec++;
501 usec -= 1000000;
502 }
503 printf("tvotohz: negative time difference %ld sec %ld usec\n",
504 sec, usec);
505#endif
506 ticks = 1;
a94976ad
MD
507 } else if (sec <= INT_MAX / hz) {
508 ticks = (int)(sec * hz +
509 ((u_long)usec + (tick - 1)) / tick) + 1;
510 } else {
511 ticks = INT_MAX;
512 }
513 return (ticks);
514}
515
516/*
517 * Compute number of ticks for the specified amount of time, erroring on
518 * the side of it being too low to ensure that sleeping the returned number
519 * of ticks will not result in a late return.
520 *
521 * The supplied timeval may not be negative and should be normalized. A
522 * return value of 0 is possible if the timeval converts to less then
523 * 1 tick.
524 *
525 * If ints have 32 bits, then the maximum value for any timeout in
526 * 10ms ticks is 248 days.
527 */
528int
529tvtohz_low(struct timeval *tv)
530{
531 int ticks;
532 long sec;
533
534 sec = tv->tv_sec;
535 if (sec <= INT_MAX / hz)
536 ticks = (int)(sec * hz + (u_long)tv->tv_usec / tick);
984263bc 537 else
984263bc 538 ticks = INT_MAX;
a94976ad 539 return (ticks);
984263bc
MD
540}
541
a94976ad 542
984263bc
MD
543/*
544 * Start profiling on a process.
545 *
546 * Kernel profiling passes proc0 which never exits and hence
547 * keeps the profile clock running constantly.
548 */
549void
88c4d2f6 550startprofclock(struct proc *p)
984263bc 551{
984263bc
MD
552 if ((p->p_flag & P_PROFIL) == 0) {
553 p->p_flag |= P_PROFIL;
88c4d2f6 554#if 0 /* XXX */
984263bc
MD
555 if (++profprocs == 1 && stathz != 0) {
556 s = splstatclock();
6ad39cae 557 psdiv = psratio;
984263bc
MD
558 setstatclockrate(profhz);
559 splx(s);
560 }
88c4d2f6 561#endif
984263bc
MD
562 }
563}
564
565/*
566 * Stop profiling on a process.
567 */
568void
88c4d2f6 569stopprofclock(struct proc *p)
984263bc 570{
984263bc
MD
571 if (p->p_flag & P_PROFIL) {
572 p->p_flag &= ~P_PROFIL;
88c4d2f6 573#if 0 /* XXX */
984263bc
MD
574 if (--profprocs == 0 && stathz != 0) {
575 s = splstatclock();
6ad39cae 576 psdiv = 1;
984263bc
MD
577 setstatclockrate(stathz);
578 splx(s);
579 }
984263bc 580#endif
984263bc
MD
581 }
582}
583
584/*
585 * Return information about system clocks.
586 */
587static int
588sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS)
589{
590 struct clockinfo clkinfo;
591 /*
592 * Construct clockinfo structure.
593 */
594 clkinfo.hz = hz;
595 clkinfo.tick = tick;
596 clkinfo.tickadj = tickadj;
597 clkinfo.profhz = profhz;
598 clkinfo.stathz = stathz ? stathz : hz;
599 return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
600}
601
602SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD,
603 0, 0, sysctl_kern_clockrate, "S,clockinfo","");
604
984263bc
MD
605/*
606 * We have eight functions for looking at the clock, four for
607 * microseconds and four for nanoseconds. For each there is fast
608 * but less precise version "get{nano|micro}[up]time" which will
609 * return a time which is up to 1/HZ previous to the call, whereas
610 * the raw version "{nano|micro}[up]time" will return a timestamp
611 * which is as precise as possible. The "up" variants return the
612 * time relative to system boot, these are well suited for time
613 * interval measurements.
88c4d2f6
MD
614 *
615 * Each cpu independantly maintains the current time of day, so all
616 * we need to do to protect ourselves from changes is to do a loop
617 * check on the seconds field changing out from under us.
984263bc 618 */
984263bc
MD
619void
620getmicrouptime(struct timeval *tvp)
621{
88c4d2f6
MD
622 struct globaldata *gd = mycpu;
623 sysclock_t delta;
624
625 do {
626 tvp->tv_sec = gd->gd_time_seconds;
627 delta = gd->gd_hardclock.time - gd->gd_cpuclock_base;
628 } while (tvp->tv_sec != gd->gd_time_seconds);
629 tvp->tv_usec = (cputimer_freq64_usec * delta) >> 32;
630 if (tvp->tv_usec >= 1000000) {
631 tvp->tv_usec -= 1000000;
632 ++tvp->tv_sec;
984263bc
MD
633 }
634}
635
636void
637getnanouptime(struct timespec *tsp)
638{
88c4d2f6
MD
639 struct globaldata *gd = mycpu;
640 sysclock_t delta;
641
642 do {
643 tsp->tv_sec = gd->gd_time_seconds;
644 delta = gd->gd_hardclock.time - gd->gd_cpuclock_base;
645 } while (tsp->tv_sec != gd->gd_time_seconds);
646 tsp->tv_nsec = (cputimer_freq64_nsec * delta) >> 32;
647 if (tsp->tv_nsec >= 1000000000) {
648 tsp->tv_nsec -= 1000000000;
649 ++tsp->tv_sec;
984263bc
MD
650 }
651}
652
653void
88c4d2f6 654microuptime(struct timeval *tvp)
984263bc 655{
88c4d2f6
MD
656 struct globaldata *gd = mycpu;
657 sysclock_t delta;
658
659 do {
660 tvp->tv_sec = gd->gd_time_seconds;
661 delta = cputimer_count() - gd->gd_cpuclock_base;
662 } while (tvp->tv_sec != gd->gd_time_seconds);
663 tvp->tv_usec = (cputimer_freq64_usec * delta) >> 32;
664 if (tvp->tv_usec >= 1000000) {
665 tvp->tv_usec -= 1000000;
666 ++tvp->tv_sec;
984263bc
MD
667 }
668}
669
670void
88c4d2f6 671nanouptime(struct timespec *tsp)
984263bc 672{
88c4d2f6
MD
673 struct globaldata *gd = mycpu;
674 sysclock_t delta;
675
676 do {
677 tsp->tv_sec = gd->gd_time_seconds;
678 delta = cputimer_count() - gd->gd_cpuclock_base;
679 } while (tsp->tv_sec != gd->gd_time_seconds);
680 tsp->tv_nsec = (cputimer_freq64_nsec * delta) >> 32;
681 if (tsp->tv_nsec >= 1000000000) {
682 tsp->tv_nsec -= 1000000000;
683 ++tsp->tv_sec;
984263bc 684 }
984263bc
MD
685}
686
88c4d2f6
MD
687/*
688 * realtime routines
689 */
984263bc
MD
690
691void
88c4d2f6 692getmicrotime(struct timeval *tvp)
984263bc 693{
88c4d2f6
MD
694 struct globaldata *gd = mycpu;
695 sysclock_t delta;
984263bc 696
88c4d2f6
MD
697 do {
698 tvp->tv_sec = gd->gd_time_seconds;
699 delta = gd->gd_hardclock.time - gd->gd_cpuclock_base;
700 } while (tvp->tv_sec != gd->gd_time_seconds);
701 tvp->tv_usec = (cputimer_freq64_usec * delta) >> 32;
984263bc 702
88c4d2f6
MD
703 tvp->tv_sec += basetime.tv_sec;
704 tvp->tv_usec += basetime.tv_nsec / 1000;
705 while (tvp->tv_usec >= 1000000) {
706 tvp->tv_usec -= 1000000;
707 ++tvp->tv_sec;
984263bc 708 }
984263bc
MD
709}
710
711void
88c4d2f6 712getnanotime(struct timespec *tsp)
984263bc 713{
88c4d2f6
MD
714 struct globaldata *gd = mycpu;
715 sysclock_t delta;
984263bc 716
88c4d2f6
MD
717 do {
718 tsp->tv_sec = gd->gd_time_seconds;
719 delta = gd->gd_hardclock.time - gd->gd_cpuclock_base;
720 } while (tsp->tv_sec != gd->gd_time_seconds);
721 tsp->tv_nsec = (cputimer_freq64_nsec * delta) >> 32;
984263bc 722
88c4d2f6
MD
723 tsp->tv_sec += basetime.tv_sec;
724 tsp->tv_nsec += basetime.tv_nsec;
725 while (tsp->tv_nsec >= 1000000000) {
726 tsp->tv_nsec -= 1000000000;
727 ++tsp->tv_sec;
984263bc 728 }
984263bc
MD
729}
730
88c4d2f6
MD
731void
732microtime(struct timeval *tvp)
984263bc 733{
88c4d2f6
MD
734 struct globaldata *gd = mycpu;
735 sysclock_t delta;
984263bc 736
88c4d2f6
MD
737 do {
738 tvp->tv_sec = gd->gd_time_seconds;
739 delta = cputimer_count() - gd->gd_cpuclock_base;
740 } while (tvp->tv_sec != gd->gd_time_seconds);
741 tvp->tv_usec = (cputimer_freq64_usec * delta) >> 32;
984263bc 742
88c4d2f6
MD
743 tvp->tv_sec += basetime.tv_sec;
744 tvp->tv_usec += basetime.tv_nsec / 1000;
745 while (tvp->tv_usec >= 1000000) {
746 tvp->tv_usec -= 1000000;
747 ++tvp->tv_sec;
984263bc 748 }
984263bc
MD
749}
750
88c4d2f6
MD
751void
752nanotime(struct timespec *tsp)
753{
754 struct globaldata *gd = mycpu;
755 sysclock_t delta;
984263bc 756
88c4d2f6
MD
757 do {
758 tsp->tv_sec = gd->gd_time_seconds;
759 delta = cputimer_count() - gd->gd_cpuclock_base;
760 } while (tsp->tv_sec != gd->gd_time_seconds);
761 tsp->tv_nsec = (cputimer_freq64_nsec * delta) >> 32;
984263bc 762
88c4d2f6
MD
763 tsp->tv_sec += basetime.tv_sec;
764 tsp->tv_nsec += basetime.tv_nsec;
765 while (tsp->tv_nsec >= 1000000000) {
766 tsp->tv_nsec -= 1000000000;
767 ++tsp->tv_sec;
984263bc 768 }
984263bc
MD
769}
770
984263bc
MD
771int
772pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps)
773{
774 pps_params_t *app;
775 struct pps_fetch_args *fapi;
776#ifdef PPS_SYNC
777 struct pps_kcbind_args *kapi;
778#endif
779
780 switch (cmd) {
781 case PPS_IOC_CREATE:
782 return (0);
783 case PPS_IOC_DESTROY:
784 return (0);
785 case PPS_IOC_SETPARAMS:
786 app = (pps_params_t *)data;
787 if (app->mode & ~pps->ppscap)
788 return (EINVAL);
789 pps->ppsparam = *app;
790 return (0);
791 case PPS_IOC_GETPARAMS:
792 app = (pps_params_t *)data;
793 *app = pps->ppsparam;
794 app->api_version = PPS_API_VERS_1;
795 return (0);
796 case PPS_IOC_GETCAP:
797 *(int*)data = pps->ppscap;
798 return (0);
799 case PPS_IOC_FETCH:
800 fapi = (struct pps_fetch_args *)data;
801 if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC)
802 return (EINVAL);
803 if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec)
804 return (EOPNOTSUPP);
805 pps->ppsinfo.current_mode = pps->ppsparam.mode;
806 fapi->pps_info_buf = pps->ppsinfo;
807 return (0);
808 case PPS_IOC_KCBIND:
809#ifdef PPS_SYNC
810 kapi = (struct pps_kcbind_args *)data;
811 /* XXX Only root should be able to do this */
812 if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC)
813 return (EINVAL);
814 if (kapi->kernel_consumer != PPS_KC_HARDPPS)
815 return (EINVAL);
816 if (kapi->edge & ~pps->ppscap)
817 return (EINVAL);
818 pps->kcmode = kapi->edge;
819 return (0);
820#else
821 return (EOPNOTSUPP);
822#endif
823 default:
824 return (ENOTTY);
825 }
826}
827
828void
829pps_init(struct pps_state *pps)
830{
831 pps->ppscap |= PPS_TSFMT_TSPEC;
832 if (pps->ppscap & PPS_CAPTUREASSERT)
833 pps->ppscap |= PPS_OFFSETASSERT;
834 if (pps->ppscap & PPS_CAPTURECLEAR)
835 pps->ppscap |= PPS_OFFSETCLEAR;
836}
837
838void
88c4d2f6 839pps_event(struct pps_state *pps, sysclock_t count, int event)
984263bc 840{
88c4d2f6
MD
841 struct globaldata *gd;
842 struct timespec *tsp;
843 struct timespec *osp;
844 struct timespec ts;
845 sysclock_t *pcount;
846#ifdef PPS_SYNC
847 sysclock_t tcount;
848#endif
849 sysclock_t delta;
850 pps_seq_t *pseq;
851 int foff;
852 int fhard;
853
854 gd = mycpu;
984263bc
MD
855
856 /* Things would be easier with arrays... */
857 if (event == PPS_CAPTUREASSERT) {
858 tsp = &pps->ppsinfo.assert_timestamp;
859 osp = &pps->ppsparam.assert_offset;
860 foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
861 fhard = pps->kcmode & PPS_CAPTUREASSERT;
862 pcount = &pps->ppscount[0];
863 pseq = &pps->ppsinfo.assert_sequence;
864 } else {
865 tsp = &pps->ppsinfo.clear_timestamp;
866 osp = &pps->ppsparam.clear_offset;
867 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
868 fhard = pps->kcmode & PPS_CAPTURECLEAR;
869 pcount = &pps->ppscount[1];
870 pseq = &pps->ppsinfo.clear_sequence;
871 }
872
984263bc
MD
873 /* Nothing really happened */
874 if (*pcount == count)
875 return;
876
877 *pcount = count;
878
88c4d2f6
MD
879 do {
880 ts.tv_sec = gd->gd_time_seconds;
881 delta = count - gd->gd_cpuclock_base;
882 } while (ts.tv_sec != gd->gd_time_seconds);
883 if (delta > cputimer_freq) {
884 ts.tv_sec += delta / cputimer_freq;
885 delta %= cputimer_freq;
886 }
887 ts.tv_nsec = (cputimer_freq64_nsec * delta) >> 32;
888 ts.tv_sec += basetime.tv_sec;
889 ts.tv_nsec += basetime.tv_nsec;
890 while (ts.tv_nsec >= 1000000000) {
891 ts.tv_nsec -= 1000000000;
892 ++ts.tv_sec;
984263bc 893 }
984263bc
MD
894
895 (*pseq)++;
896 *tsp = ts;
897
898 if (foff) {
899 timespecadd(tsp, osp);
900 if (tsp->tv_nsec < 0) {
901 tsp->tv_nsec += 1000000000;
902 tsp->tv_sec -= 1;
903 }
904 }
905#ifdef PPS_SYNC
906 if (fhard) {
907 /* magic, at its best... */
908 tcount = count - pps->ppscount[2];
909 pps->ppscount[2] = count;
88c4d2f6 910 delta = (cputimer_freq64_nsec * tcount) >> 32;
984263bc
MD
911 hardpps(tsp, delta);
912 }
913#endif
914}
88c4d2f6 915