This commit represents a major revamping of the clock interrupt and timebase
[dragonfly.git] / sys / kern / kern_clock.c
CommitLineData
984263bc 1/*-
88c4d2f6 2 * Copyright (c) 2004, Matthew Dillon <dillon@backplane.com>
984263bc
MD
3 * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org>
4 * Copyright (c) 1982, 1986, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
41 * $FreeBSD: src/sys/kern/kern_clock.c,v 1.105.2.10 2002/10/17 13:19:40 maxim Exp $
88c4d2f6 42 * $DragonFly: src/sys/kern/kern_clock.c,v 1.15 2004/01/30 05:42:17 dillon Exp $
984263bc
MD
43 */
44
45#include "opt_ntp.h"
46
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/dkstat.h>
50#include <sys/callout.h>
51#include <sys/kernel.h>
52#include <sys/proc.h>
53#include <sys/malloc.h>
54#include <sys/resourcevar.h>
55#include <sys/signalvar.h>
56#include <sys/timex.h>
57#include <sys/timepps.h>
58#include <vm/vm.h>
59#include <sys/lock.h>
60#include <vm/pmap.h>
61#include <vm/vm_map.h>
62#include <sys/sysctl.h>
2689779e 63#include <sys/thread2.h>
984263bc
MD
64
65#include <machine/cpu.h>
66#include <machine/limits.h>
67#include <machine/smp.h>
68
69#ifdef GPROF
70#include <sys/gmon.h>
71#endif
72
73#ifdef DEVICE_POLLING
74extern void init_device_poll(void);
75extern void hardclock_device_poll(void);
76#endif /* DEVICE_POLLING */
77
402ed7e1 78static void initclocks (void *dummy);
984263bc
MD
79SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL)
80
6ad39cae
MD
81/*
82 * Some of these don't belong here, but it's easiest to concentrate them.
83 * Note that cp_time[] counts in microseconds, but most userland programs
84 * just compare relative times against the total by delta.
85 */
984263bc
MD
86long cp_time[CPUSTATES];
87
88SYSCTL_OPAQUE(_kern, OID_AUTO, cp_time, CTLFLAG_RD, &cp_time, sizeof(cp_time),
89 "LU", "CPU time statistics");
90
91long tk_cancc;
92long tk_nin;
93long tk_nout;
94long tk_rawcc;
95
88c4d2f6
MD
96/*
97 * boottime is used to calculate the 'real' uptime. Do not confuse this with
98 * microuptime(). microtime() is not drift compensated. The real uptime
99 * with compensation is nanotime() - bootime.
100 *
101 * basetime is used to calculate the compensated real time of day. Chunky
102 * changes to the time, aka settimeofday(), are made by modifying basetime.
103 *
104 * The gd_time_seconds and gd_cpuclock_base fields remain fairly monotonic.
105 * Slight adjustments to gd_cpuclock_base are made to phase-lock it to
106 * the real time.
107 */
108struct timespec boottime; /* boot time (realtime) for reference only */
109struct timespec basetime; /* base time adjusts uptime -> realtime */
110time_t time_second; /* read-only 'passive' uptime in seconds */
984263bc 111
984263bc
MD
112SYSCTL_STRUCT(_kern, KERN_BOOTTIME, boottime, CTLFLAG_RD,
113 &boottime, timeval, "System boottime");
88c4d2f6
MD
114SYSCTL_STRUCT(_kern, OID_AUTO, basetime, CTLFLAG_RD,
115 &basetime, timeval, "System basetime");
984263bc 116
88c4d2f6
MD
117static void hardclock(systimer_t info, struct intrframe *frame);
118static void statclock(systimer_t info, struct intrframe *frame);
119static void schedclock(systimer_t info, struct intrframe *frame);
120
121int ticks; /* system master ticks at hz */
122int64_t nsec_adj; /* ntpd per-tick adjustment in nsec << 32 */
123int64_t nsec_acc; /* accumulator */
984263bc
MD
124
125/*
88c4d2f6 126 * Finish initializing clock frequencies and start all clocks running.
984263bc 127 */
88c4d2f6
MD
128/* ARGSUSED*/
129static void
130initclocks(void *dummy)
984263bc 131{
88c4d2f6
MD
132 cpu_initclocks();
133#ifdef DEVICE_POLLING
134 init_device_poll();
135#endif
136 /*psratio = profhz / stathz;*/
137 initclocks_pcpu();
984263bc
MD
138}
139
88c4d2f6
MD
140/*
141 * Called on a per-cpu basis
142 */
143void
144initclocks_pcpu(void)
145{
146 struct globaldata *gd = mycpu;
984263bc 147
88c4d2f6
MD
148 crit_enter();
149 if (gd->gd_cpuid == 0) {
150 gd->gd_time_seconds = 1;
151 gd->gd_cpuclock_base = cputimer_count();
152 } else {
153 /* XXX */
154 gd->gd_time_seconds = globaldata_find(0)->gd_time_seconds;
155 gd->gd_cpuclock_base = globaldata_find(0)->gd_cpuclock_base;
156 }
157 systimer_init_periodic(&gd->gd_hardclock, hardclock, NULL, hz);
158 systimer_init_periodic(&gd->gd_statclock, statclock, NULL, stathz);
159 /* XXX correct the frequency for scheduler / estcpu tests */
160 systimer_init_periodic(&gd->gd_schedclock, schedclock, NULL, 10);
161 crit_exit();
162}
984263bc
MD
163
164/*
88c4d2f6
MD
165 * This sets the current real time of day. Timespecs are in seconds and
166 * nanoseconds. We do not mess with gd_time_seconds and gd_cpuclock_base,
167 * instead we adjust basetime so basetime + gd_* results in the current
168 * time of day. This way the gd_* fields are guarenteed to represent
169 * a monotonically increasing 'uptime' value.
984263bc 170 */
88c4d2f6
MD
171void
172set_timeofday(struct timespec *ts)
173{
174 struct timespec ts2;
984263bc 175
88c4d2f6
MD
176 /*
177 * XXX SMP / non-atomic basetime updates
178 */
179 crit_enter();
180 nanouptime(&ts2);
181 basetime.tv_sec = ts->tv_sec - ts2.tv_sec;
182 basetime.tv_nsec = ts->tv_nsec - ts2.tv_nsec;
183 if (basetime.tv_nsec < 0) {
184 basetime.tv_nsec += 1000000000;
185 --basetime.tv_sec;
186 }
187 if (boottime.tv_sec == 0)
188 boottime = basetime;
189 timedelta = 0;
190 crit_exit();
191}
192
984263bc 193/*
88c4d2f6
MD
194 * Each cpu has its own hardclock, but we only increments ticks and softticks
195 * on cpu #0.
196 *
197 * NOTE! systimer! the MP lock might not be held here. We can only safely
198 * manipulate objects owned by the current cpu.
984263bc 199 */
984263bc 200static void
88c4d2f6 201hardclock(systimer_t info, struct intrframe *frame)
984263bc 202{
88c4d2f6
MD
203 sysclock_t cputicks;
204 struct proc *p;
205 struct pstats *pstats;
206 struct globaldata *gd = mycpu;
984263bc
MD
207
208 /*
88c4d2f6
MD
209 * Realtime updates are per-cpu. Note that timer corrections as
210 * returned by microtime() and friends make an additional adjustment
211 * using a system-wise 'basetime', but the running time is always
212 * taken from the per-cpu globaldata area. Since the same clock
213 * is distributing (XXX SMP) to all cpus, the per-cpu timebases
214 * stay in synch.
215 *
216 * Note that we never allow info->time (aka gd->gd_hardclock.time)
217 * to reverse index gd_cpuclock_base.
984263bc 218 */
88c4d2f6
MD
219 cputicks = info->time - gd->gd_cpuclock_base;
220 if (cputicks > cputimer_freq) {
221 ++gd->gd_time_seconds;
222 gd->gd_cpuclock_base += cputimer_freq;
223 }
984263bc
MD
224
225 /*
88c4d2f6
MD
226 * The system-wide ticks and softticks are only updated by cpu #0.
227 * Callwheel actions are also (at the moment) only handled by cpu #0.
228 * Finally, we also do NTP related timedelta/tickdelta adjustments
229 * by adjusting basetime.
984263bc 230 */
88c4d2f6
MD
231 if (gd->gd_cpuid == 0) {
232 struct timespec nts;
233 int leap;
984263bc 234
88c4d2f6 235 ++ticks;
984263bc 236
88c4d2f6
MD
237#ifdef DEVICE_POLLING
238 hardclock_device_poll(); /* mpsafe, short and quick */
239#endif /* DEVICE_POLLING */
984263bc 240
88c4d2f6
MD
241 if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) {
242 setsoftclock();
243 } else if (softticks + 1 == ticks) {
244 ++softticks;
245 }
246
247#if 0
248 if (tco->tc_poll_pps)
249 tco->tc_poll_pps(tco);
250#endif
251 /*
252 * Apply adjtime corrections. At the moment only do this if
253 * we can get the MP lock to interlock with adjtime's modification
254 * of these variables. Note that basetime adjustments are not
255 * MP safe either XXX.
256 */
257 if (timedelta != 0 && try_mplock()) {
258 basetime.tv_nsec += tickdelta * 1000;
259 if (basetime.tv_nsec >= 1000000000) {
260 basetime.tv_nsec -= 1000000000;
261 ++basetime.tv_sec;
262 } else if (basetime.tv_nsec < 0) {
263 basetime.tv_nsec += 1000000000;
264 --basetime.tv_sec;
265 }
266 timedelta -= tickdelta;
267 rel_mplock();
268 }
269
270 /*
271 * Apply per-tick compensation. ticks_adj adjusts for both
272 * offset and frequency, and could be negative.
273 */
274 if (nsec_adj != 0 && try_mplock()) {
275 nsec_acc += nsec_adj;
276 if (nsec_acc >= 0x100000000LL) {
277 basetime.tv_nsec += nsec_acc >> 32;
278 nsec_acc = (nsec_acc & 0xFFFFFFFFLL);
279 } else if (nsec_acc <= -0x100000000LL) {
280 basetime.tv_nsec -= -nsec_acc >> 32;
281 nsec_acc = -(-nsec_acc & 0xFFFFFFFFLL);
282 }
283 if (basetime.tv_nsec >= 1000000000) {
284 basetime.tv_nsec -= 1000000000;
285 ++basetime.tv_sec;
286 } else if (basetime.tv_nsec < 0) {
287 basetime.tv_nsec += 1000000000;
288 --basetime.tv_sec;
289 }
290 rel_mplock();
291 }
292
293 /*
294 * If the realtime-adjusted seconds hand rolls over then tell
295 * ntp_update_second() what we did in the last second so it can
296 * calculate what to do in the next second. It may also add
297 * or subtract a leap second.
298 */
299 getnanotime(&nts);
300 if (time_second != nts.tv_sec) {
301 leap = ntp_update_second(time_second, &nsec_adj);
302 basetime.tv_sec += leap;
303 time_second = nts.tv_sec + leap;
304 nsec_adj /= hz;
305 }
306 }
307
308 /*
309 * ITimer handling is per-tick, per-cpu. I don't think psignal()
310 * is mpsafe on curproc, so XXX get the mplock.
311 */
312 if ((p = curproc) != NULL && try_mplock()) {
984263bc 313 pstats = p->p_stats;
88c4d2f6 314 if (frame && CLKF_USERMODE(frame) &&
984263bc
MD
315 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
316 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
317 psignal(p, SIGVTALRM);
318 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
319 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
320 psignal(p, SIGPROF);
88c4d2f6 321 rel_mplock();
984263bc 322 }
88c4d2f6 323}
984263bc 324
88c4d2f6
MD
325/*
326 * The statistics clock typically runs at a 125Hz rate, and is intended
327 * to be frequency offset from the hardclock (typ 100Hz). It is per-cpu.
328 *
329 * NOTE! systimer! the MP lock might not be held here. We can only safely
330 * manipulate objects owned by the current cpu.
331 *
332 * The stats clock is responsible for grabbing a profiling sample.
333 * Most of the statistics are only used by user-level statistics programs.
334 * The main exceptions are p->p_uticks, p->p_sticks, p->p_iticks, and
335 * p->p_estcpu.
336 *
337 * Like the other clocks, the stat clock is called from what is effectively
338 * a fast interrupt, so the context should be the thread/process that got
339 * interrupted.
340 */
341static void
342statclock(systimer_t info, struct intrframe *frame)
343{
344#ifdef GPROF
345 struct gmonparam *g;
346 int i;
984263bc 347#endif
88c4d2f6
MD
348 thread_t td;
349 struct proc *p;
350 int bump;
351 struct timeval tv;
352 struct timeval *stv;
984263bc
MD
353
354 /*
88c4d2f6 355 * How big was our timeslice relative to the last time?
984263bc 356 */
88c4d2f6
MD
357 microuptime(&tv); /* mpsafe */
358 stv = &mycpu->gd_stattv;
359 if (stv->tv_sec == 0) {
360 bump = 1;
361 } else {
362 bump = tv.tv_usec - stv->tv_usec +
363 (tv.tv_sec - stv->tv_sec) * 1000000;
364 if (bump < 0)
365 bump = 0;
366 if (bump > 1000000)
367 bump = 1000000;
368 }
369 *stv = tv;
984263bc 370
88c4d2f6
MD
371 td = curthread;
372 p = td->td_proc;
984263bc 373
88c4d2f6
MD
374 if (frame && CLKF_USERMODE(frame)) {
375 /*
376 * Came from userland, handle user time and deal with
377 * possible process.
378 */
379 if (p && (p->p_flag & P_PROFIL))
380 addupc_intr(p, CLKF_PC(frame), 1);
381 td->td_uticks += bump;
984263bc 382
88c4d2f6
MD
383 /*
384 * Charge the time as appropriate
385 */
386 if (p && p->p_nice > NZERO)
387 cp_time[CP_NICE] += bump;
388 else
389 cp_time[CP_USER] += bump;
390 } else {
391#ifdef GPROF
392 /*
393 * Kernel statistics are just like addupc_intr, only easier.
394 */
395 g = &_gmonparam;
396 if (g->state == GMON_PROF_ON && frame) {
397 i = CLKF_PC(frame) - g->lowpc;
398 if (i < g->textsize) {
399 i /= HISTFRACTION * sizeof(*g->kcount);
400 g->kcount[i]++;
401 }
402 }
403#endif
404 /*
405 * Came from kernel mode, so we were:
406 * - handling an interrupt,
407 * - doing syscall or trap work on behalf of the current
408 * user process, or
409 * - spinning in the idle loop.
410 * Whichever it is, charge the time as appropriate.
411 * Note that we charge interrupts to the current process,
412 * regardless of whether they are ``for'' that process,
413 * so that we know how much of its real time was spent
414 * in ``non-process'' (i.e., interrupt) work.
415 *
416 * XXX assume system if frame is NULL. A NULL frame
417 * can occur if ipi processing is done from an splx().
418 */
419 if (frame && CLKF_INTR(frame))
420 td->td_iticks += bump;
421 else
422 td->td_sticks += bump;
423
424 if (frame && CLKF_INTR(frame)) {
425 cp_time[CP_INTR] += bump;
426 } else {
427 if (td == &mycpu->gd_idlethread)
428 cp_time[CP_IDLE] += bump;
429 else
430 cp_time[CP_SYS] += bump;
431 }
432 }
433}
434
435/*
436 * The scheduler clock typically runs at a 10Hz rate. NOTE! systimer,
437 * the MP lock might not be held. We can safely manipulate parts of curproc
438 * but that's about it.
439 */
440static void
441schedclock(systimer_t info, struct intrframe *frame)
442{
443 struct proc *p;
444 struct pstats *pstats;
445 struct rusage *ru;
446 struct vmspace *vm;
447 long rss;
448
449 schedulerclock(NULL); /* mpsafe */
450 if ((p = curproc) != NULL) {
451 /* Update resource usage integrals and maximums. */
452 if ((pstats = p->p_stats) != NULL &&
453 (ru = &pstats->p_ru) != NULL &&
454 (vm = p->p_vmspace) != NULL) {
455 ru->ru_ixrss += pgtok(vm->vm_tsize);
456 ru->ru_idrss += pgtok(vm->vm_dsize);
457 ru->ru_isrss += pgtok(vm->vm_ssize);
458 rss = pgtok(vmspace_resident_count(vm));
459 if (ru->ru_maxrss < rss)
460 ru->ru_maxrss = rss;
461 }
b68b7282 462 }
984263bc
MD
463}
464
465/*
a94976ad
MD
466 * Compute number of ticks for the specified amount of time. The
467 * return value is intended to be used in a clock interrupt timed
468 * operation and guarenteed to meet or exceed the requested time.
469 * If the representation overflows, return INT_MAX. The minimum return
470 * value is 1 ticks and the function will average the calculation up.
471 * If any value greater then 0 microseconds is supplied, a value
472 * of at least 2 will be returned to ensure that a near-term clock
473 * interrupt does not cause the timeout to occur (degenerately) early.
474 *
475 * Note that limit checks must take into account microseconds, which is
476 * done simply by using the smaller signed long maximum instead of
477 * the unsigned long maximum.
478 *
479 * If ints have 32 bits, then the maximum value for any timeout in
480 * 10ms ticks is 248 days.
984263bc
MD
481 */
482int
a94976ad 483tvtohz_high(struct timeval *tv)
984263bc 484{
a94976ad 485 int ticks;
1fd87d54 486 long sec, usec;
984263bc 487
984263bc
MD
488 sec = tv->tv_sec;
489 usec = tv->tv_usec;
490 if (usec < 0) {
491 sec--;
492 usec += 1000000;
493 }
494 if (sec < 0) {
495#ifdef DIAGNOSTIC
496 if (usec > 0) {
497 sec++;
498 usec -= 1000000;
499 }
500 printf("tvotohz: negative time difference %ld sec %ld usec\n",
501 sec, usec);
502#endif
503 ticks = 1;
a94976ad
MD
504 } else if (sec <= INT_MAX / hz) {
505 ticks = (int)(sec * hz +
506 ((u_long)usec + (tick - 1)) / tick) + 1;
507 } else {
508 ticks = INT_MAX;
509 }
510 return (ticks);
511}
512
513/*
514 * Compute number of ticks for the specified amount of time, erroring on
515 * the side of it being too low to ensure that sleeping the returned number
516 * of ticks will not result in a late return.
517 *
518 * The supplied timeval may not be negative and should be normalized. A
519 * return value of 0 is possible if the timeval converts to less then
520 * 1 tick.
521 *
522 * If ints have 32 bits, then the maximum value for any timeout in
523 * 10ms ticks is 248 days.
524 */
525int
526tvtohz_low(struct timeval *tv)
527{
528 int ticks;
529 long sec;
530
531 sec = tv->tv_sec;
532 if (sec <= INT_MAX / hz)
533 ticks = (int)(sec * hz + (u_long)tv->tv_usec / tick);
984263bc 534 else
984263bc 535 ticks = INT_MAX;
a94976ad 536 return (ticks);
984263bc
MD
537}
538
a94976ad 539
984263bc
MD
540/*
541 * Start profiling on a process.
542 *
543 * Kernel profiling passes proc0 which never exits and hence
544 * keeps the profile clock running constantly.
545 */
546void
88c4d2f6 547startprofclock(struct proc *p)
984263bc 548{
984263bc
MD
549 if ((p->p_flag & P_PROFIL) == 0) {
550 p->p_flag |= P_PROFIL;
88c4d2f6 551#if 0 /* XXX */
984263bc
MD
552 if (++profprocs == 1 && stathz != 0) {
553 s = splstatclock();
6ad39cae 554 psdiv = psratio;
984263bc
MD
555 setstatclockrate(profhz);
556 splx(s);
557 }
88c4d2f6 558#endif
984263bc
MD
559 }
560}
561
562/*
563 * Stop profiling on a process.
564 */
565void
88c4d2f6 566stopprofclock(struct proc *p)
984263bc 567{
984263bc
MD
568 if (p->p_flag & P_PROFIL) {
569 p->p_flag &= ~P_PROFIL;
88c4d2f6 570#if 0 /* XXX */
984263bc
MD
571 if (--profprocs == 0 && stathz != 0) {
572 s = splstatclock();
6ad39cae 573 psdiv = 1;
984263bc
MD
574 setstatclockrate(stathz);
575 splx(s);
576 }
984263bc 577#endif
984263bc
MD
578 }
579}
580
581/*
582 * Return information about system clocks.
583 */
584static int
585sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS)
586{
587 struct clockinfo clkinfo;
588 /*
589 * Construct clockinfo structure.
590 */
591 clkinfo.hz = hz;
592 clkinfo.tick = tick;
593 clkinfo.tickadj = tickadj;
594 clkinfo.profhz = profhz;
595 clkinfo.stathz = stathz ? stathz : hz;
596 return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
597}
598
599SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD,
600 0, 0, sysctl_kern_clockrate, "S,clockinfo","");
601
984263bc
MD
602/*
603 * We have eight functions for looking at the clock, four for
604 * microseconds and four for nanoseconds. For each there is fast
605 * but less precise version "get{nano|micro}[up]time" which will
606 * return a time which is up to 1/HZ previous to the call, whereas
607 * the raw version "{nano|micro}[up]time" will return a timestamp
608 * which is as precise as possible. The "up" variants return the
609 * time relative to system boot, these are well suited for time
610 * interval measurements.
88c4d2f6
MD
611 *
612 * Each cpu independantly maintains the current time of day, so all
613 * we need to do to protect ourselves from changes is to do a loop
614 * check on the seconds field changing out from under us.
984263bc 615 */
984263bc
MD
616void
617getmicrouptime(struct timeval *tvp)
618{
88c4d2f6
MD
619 struct globaldata *gd = mycpu;
620 sysclock_t delta;
621
622 do {
623 tvp->tv_sec = gd->gd_time_seconds;
624 delta = gd->gd_hardclock.time - gd->gd_cpuclock_base;
625 } while (tvp->tv_sec != gd->gd_time_seconds);
626 tvp->tv_usec = (cputimer_freq64_usec * delta) >> 32;
627 if (tvp->tv_usec >= 1000000) {
628 tvp->tv_usec -= 1000000;
629 ++tvp->tv_sec;
984263bc
MD
630 }
631}
632
633void
634getnanouptime(struct timespec *tsp)
635{
88c4d2f6
MD
636 struct globaldata *gd = mycpu;
637 sysclock_t delta;
638
639 do {
640 tsp->tv_sec = gd->gd_time_seconds;
641 delta = gd->gd_hardclock.time - gd->gd_cpuclock_base;
642 } while (tsp->tv_sec != gd->gd_time_seconds);
643 tsp->tv_nsec = (cputimer_freq64_nsec * delta) >> 32;
644 if (tsp->tv_nsec >= 1000000000) {
645 tsp->tv_nsec -= 1000000000;
646 ++tsp->tv_sec;
984263bc
MD
647 }
648}
649
650void
88c4d2f6 651microuptime(struct timeval *tvp)
984263bc 652{
88c4d2f6
MD
653 struct globaldata *gd = mycpu;
654 sysclock_t delta;
655
656 do {
657 tvp->tv_sec = gd->gd_time_seconds;
658 delta = cputimer_count() - gd->gd_cpuclock_base;
659 } while (tvp->tv_sec != gd->gd_time_seconds);
660 tvp->tv_usec = (cputimer_freq64_usec * delta) >> 32;
661 if (tvp->tv_usec >= 1000000) {
662 tvp->tv_usec -= 1000000;
663 ++tvp->tv_sec;
984263bc
MD
664 }
665}
666
667void
88c4d2f6 668nanouptime(struct timespec *tsp)
984263bc 669{
88c4d2f6
MD
670 struct globaldata *gd = mycpu;
671 sysclock_t delta;
672
673 do {
674 tsp->tv_sec = gd->gd_time_seconds;
675 delta = cputimer_count() - gd->gd_cpuclock_base;
676 } while (tsp->tv_sec != gd->gd_time_seconds);
677 tsp->tv_nsec = (cputimer_freq64_nsec * delta) >> 32;
678 if (tsp->tv_nsec >= 1000000000) {
679 tsp->tv_nsec -= 1000000000;
680 ++tsp->tv_sec;
984263bc 681 }
984263bc
MD
682}
683
88c4d2f6
MD
684/*
685 * realtime routines
686 */
984263bc
MD
687
688void
88c4d2f6 689getmicrotime(struct timeval *tvp)
984263bc 690{
88c4d2f6
MD
691 struct globaldata *gd = mycpu;
692 sysclock_t delta;
984263bc 693
88c4d2f6
MD
694 do {
695 tvp->tv_sec = gd->gd_time_seconds;
696 delta = gd->gd_hardclock.time - gd->gd_cpuclock_base;
697 } while (tvp->tv_sec != gd->gd_time_seconds);
698 tvp->tv_usec = (cputimer_freq64_usec * delta) >> 32;
984263bc 699
88c4d2f6
MD
700 tvp->tv_sec += basetime.tv_sec;
701 tvp->tv_usec += basetime.tv_nsec / 1000;
702 while (tvp->tv_usec >= 1000000) {
703 tvp->tv_usec -= 1000000;
704 ++tvp->tv_sec;
984263bc 705 }
984263bc
MD
706}
707
708void
88c4d2f6 709getnanotime(struct timespec *tsp)
984263bc 710{
88c4d2f6
MD
711 struct globaldata *gd = mycpu;
712 sysclock_t delta;
984263bc 713
88c4d2f6
MD
714 do {
715 tsp->tv_sec = gd->gd_time_seconds;
716 delta = gd->gd_hardclock.time - gd->gd_cpuclock_base;
717 } while (tsp->tv_sec != gd->gd_time_seconds);
718 tsp->tv_nsec = (cputimer_freq64_nsec * delta) >> 32;
984263bc 719
88c4d2f6
MD
720 tsp->tv_sec += basetime.tv_sec;
721 tsp->tv_nsec += basetime.tv_nsec;
722 while (tsp->tv_nsec >= 1000000000) {
723 tsp->tv_nsec -= 1000000000;
724 ++tsp->tv_sec;
984263bc 725 }
984263bc
MD
726}
727
88c4d2f6
MD
728void
729microtime(struct timeval *tvp)
984263bc 730{
88c4d2f6
MD
731 struct globaldata *gd = mycpu;
732 sysclock_t delta;
984263bc 733
88c4d2f6
MD
734 do {
735 tvp->tv_sec = gd->gd_time_seconds;
736 delta = cputimer_count() - gd->gd_cpuclock_base;
737 } while (tvp->tv_sec != gd->gd_time_seconds);
738 tvp->tv_usec = (cputimer_freq64_usec * delta) >> 32;
984263bc 739
88c4d2f6
MD
740 tvp->tv_sec += basetime.tv_sec;
741 tvp->tv_usec += basetime.tv_nsec / 1000;
742 while (tvp->tv_usec >= 1000000) {
743 tvp->tv_usec -= 1000000;
744 ++tvp->tv_sec;
984263bc 745 }
984263bc
MD
746}
747
88c4d2f6
MD
748void
749nanotime(struct timespec *tsp)
750{
751 struct globaldata *gd = mycpu;
752 sysclock_t delta;
984263bc 753
88c4d2f6
MD
754 do {
755 tsp->tv_sec = gd->gd_time_seconds;
756 delta = cputimer_count() - gd->gd_cpuclock_base;
757 } while (tsp->tv_sec != gd->gd_time_seconds);
758 tsp->tv_nsec = (cputimer_freq64_nsec * delta) >> 32;
984263bc 759
88c4d2f6
MD
760 tsp->tv_sec += basetime.tv_sec;
761 tsp->tv_nsec += basetime.tv_nsec;
762 while (tsp->tv_nsec >= 1000000000) {
763 tsp->tv_nsec -= 1000000000;
764 ++tsp->tv_sec;
984263bc 765 }
984263bc
MD
766}
767
984263bc
MD
768int
769pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps)
770{
771 pps_params_t *app;
772 struct pps_fetch_args *fapi;
773#ifdef PPS_SYNC
774 struct pps_kcbind_args *kapi;
775#endif
776
777 switch (cmd) {
778 case PPS_IOC_CREATE:
779 return (0);
780 case PPS_IOC_DESTROY:
781 return (0);
782 case PPS_IOC_SETPARAMS:
783 app = (pps_params_t *)data;
784 if (app->mode & ~pps->ppscap)
785 return (EINVAL);
786 pps->ppsparam = *app;
787 return (0);
788 case PPS_IOC_GETPARAMS:
789 app = (pps_params_t *)data;
790 *app = pps->ppsparam;
791 app->api_version = PPS_API_VERS_1;
792 return (0);
793 case PPS_IOC_GETCAP:
794 *(int*)data = pps->ppscap;
795 return (0);
796 case PPS_IOC_FETCH:
797 fapi = (struct pps_fetch_args *)data;
798 if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC)
799 return (EINVAL);
800 if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec)
801 return (EOPNOTSUPP);
802 pps->ppsinfo.current_mode = pps->ppsparam.mode;
803 fapi->pps_info_buf = pps->ppsinfo;
804 return (0);
805 case PPS_IOC_KCBIND:
806#ifdef PPS_SYNC
807 kapi = (struct pps_kcbind_args *)data;
808 /* XXX Only root should be able to do this */
809 if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC)
810 return (EINVAL);
811 if (kapi->kernel_consumer != PPS_KC_HARDPPS)
812 return (EINVAL);
813 if (kapi->edge & ~pps->ppscap)
814 return (EINVAL);
815 pps->kcmode = kapi->edge;
816 return (0);
817#else
818 return (EOPNOTSUPP);
819#endif
820 default:
821 return (ENOTTY);
822 }
823}
824
825void
826pps_init(struct pps_state *pps)
827{
828 pps->ppscap |= PPS_TSFMT_TSPEC;
829 if (pps->ppscap & PPS_CAPTUREASSERT)
830 pps->ppscap |= PPS_OFFSETASSERT;
831 if (pps->ppscap & PPS_CAPTURECLEAR)
832 pps->ppscap |= PPS_OFFSETCLEAR;
833}
834
835void
88c4d2f6 836pps_event(struct pps_state *pps, sysclock_t count, int event)
984263bc 837{
88c4d2f6
MD
838 struct globaldata *gd;
839 struct timespec *tsp;
840 struct timespec *osp;
841 struct timespec ts;
842 sysclock_t *pcount;
843#ifdef PPS_SYNC
844 sysclock_t tcount;
845#endif
846 sysclock_t delta;
847 pps_seq_t *pseq;
848 int foff;
849 int fhard;
850
851 gd = mycpu;
984263bc
MD
852
853 /* Things would be easier with arrays... */
854 if (event == PPS_CAPTUREASSERT) {
855 tsp = &pps->ppsinfo.assert_timestamp;
856 osp = &pps->ppsparam.assert_offset;
857 foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
858 fhard = pps->kcmode & PPS_CAPTUREASSERT;
859 pcount = &pps->ppscount[0];
860 pseq = &pps->ppsinfo.assert_sequence;
861 } else {
862 tsp = &pps->ppsinfo.clear_timestamp;
863 osp = &pps->ppsparam.clear_offset;
864 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
865 fhard = pps->kcmode & PPS_CAPTURECLEAR;
866 pcount = &pps->ppscount[1];
867 pseq = &pps->ppsinfo.clear_sequence;
868 }
869
984263bc
MD
870 /* Nothing really happened */
871 if (*pcount == count)
872 return;
873
874 *pcount = count;
875
88c4d2f6
MD
876 do {
877 ts.tv_sec = gd->gd_time_seconds;
878 delta = count - gd->gd_cpuclock_base;
879 } while (ts.tv_sec != gd->gd_time_seconds);
880 if (delta > cputimer_freq) {
881 ts.tv_sec += delta / cputimer_freq;
882 delta %= cputimer_freq;
883 }
884 ts.tv_nsec = (cputimer_freq64_nsec * delta) >> 32;
885 ts.tv_sec += basetime.tv_sec;
886 ts.tv_nsec += basetime.tv_nsec;
887 while (ts.tv_nsec >= 1000000000) {
888 ts.tv_nsec -= 1000000000;
889 ++ts.tv_sec;
984263bc 890 }
984263bc
MD
891
892 (*pseq)++;
893 *tsp = ts;
894
895 if (foff) {
896 timespecadd(tsp, osp);
897 if (tsp->tv_nsec < 0) {
898 tsp->tv_nsec += 1000000000;
899 tsp->tv_sec -= 1;
900 }
901 }
902#ifdef PPS_SYNC
903 if (fhard) {
904 /* magic, at its best... */
905 tcount = count - pps->ppscount[2];
906 pps->ppscount[2] = count;
88c4d2f6 907 delta = (cputimer_freq64_nsec * tcount) >> 32;
984263bc
MD
908 hardpps(tsp, delta);
909 }
910#endif
911}
88c4d2f6 912