This should hopefully fix current issues with bootstrap buildworlds from
[dragonfly.git] / sys / kern / kern_clock.c
CommitLineData
984263bc
MD
1/*-
2 * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org>
3 * Copyright (c) 1982, 1986, 1991, 1993
4 * The Regents of the University of California. All rights reserved.
5 * (c) UNIX System Laboratories, Inc.
6 * All or some portions of this file are derived from material licensed
7 * to the University of California by American Telephone and Telegraph
8 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
9 * the permission of UNIX System Laboratories, Inc.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
40 * $FreeBSD: src/sys/kern/kern_clock.c,v 1.105.2.10 2002/10/17 13:19:40 maxim Exp $
2689779e 41 * $DragonFly: src/sys/kern/kern_clock.c,v 1.14 2004/01/07 20:21:46 dillon Exp $
984263bc
MD
42 */
43
44#include "opt_ntp.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/dkstat.h>
49#include <sys/callout.h>
50#include <sys/kernel.h>
51#include <sys/proc.h>
52#include <sys/malloc.h>
53#include <sys/resourcevar.h>
54#include <sys/signalvar.h>
55#include <sys/timex.h>
56#include <sys/timepps.h>
57#include <vm/vm.h>
58#include <sys/lock.h>
59#include <vm/pmap.h>
60#include <vm/vm_map.h>
61#include <sys/sysctl.h>
2689779e 62#include <sys/thread2.h>
984263bc
MD
63
64#include <machine/cpu.h>
65#include <machine/limits.h>
66#include <machine/smp.h>
67
68#ifdef GPROF
69#include <sys/gmon.h>
70#endif
71
72#ifdef DEVICE_POLLING
73extern void init_device_poll(void);
74extern void hardclock_device_poll(void);
75#endif /* DEVICE_POLLING */
76
77/*
78 * Number of timecounters used to implement stable storage
79 */
80#ifndef NTIMECOUNTER
81#define NTIMECOUNTER 5
82#endif
83
84static MALLOC_DEFINE(M_TIMECOUNTER, "timecounter",
85 "Timecounter stable storage");
86
402ed7e1 87static void initclocks (void *dummy);
984263bc
MD
88SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL)
89
402ed7e1
RG
90static void tco_forward (int force);
91static void tco_setscales (struct timecounter *tc);
92static __inline unsigned tco_delta (struct timecounter *tc);
984263bc 93
6ad39cae
MD
94/*
95 * Some of these don't belong here, but it's easiest to concentrate them.
96 * Note that cp_time[] counts in microseconds, but most userland programs
97 * just compare relative times against the total by delta.
98 */
984263bc
MD
99long cp_time[CPUSTATES];
100
101SYSCTL_OPAQUE(_kern, OID_AUTO, cp_time, CTLFLAG_RD, &cp_time, sizeof(cp_time),
102 "LU", "CPU time statistics");
103
104long tk_cancc;
105long tk_nin;
106long tk_nout;
107long tk_rawcc;
108
109time_t time_second;
110
111struct timeval boottime;
112SYSCTL_STRUCT(_kern, KERN_BOOTTIME, boottime, CTLFLAG_RD,
113 &boottime, timeval, "System boottime");
114
115/*
116 * Which update policy to use.
117 * 0 - every tick, bad hardware may fail with "calcru negative..."
118 * 1 - more resistent to the above hardware, but less efficient.
119 */
120static int tco_method;
121
122/*
123 * Implement a dummy timecounter which we can use until we get a real one
124 * in the air. This allows the console and other early stuff to use
125 * timeservices.
126 */
127
128static unsigned
129dummy_get_timecount(struct timecounter *tc)
130{
131 static unsigned now;
132 return (++now);
133}
134
135static struct timecounter dummy_timecounter = {
136 dummy_get_timecount,
137 0,
138 ~0u,
139 1000000,
140 "dummy"
141};
142
143struct timecounter *timecounter = &dummy_timecounter;
144
145/*
146 * Clock handling routines.
147 *
148 * This code is written to operate with two timers that run independently of
149 * each other.
150 *
151 * The main timer, running hz times per second, is used to trigger interval
152 * timers, timeouts and rescheduling as needed.
153 *
154 * The second timer handles kernel and user profiling,
155 * and does resource use estimation. If the second timer is programmable,
156 * it is randomized to avoid aliasing between the two clocks. For example,
157 * the randomization prevents an adversary from always giving up the cpu
158 * just before its quantum expires. Otherwise, it would never accumulate
159 * cpu ticks. The mean frequency of the second timer is stathz.
160 *
161 * If no second timer exists, stathz will be zero; in this case we drive
162 * profiling and statistics off the main clock. This WILL NOT be accurate;
163 * do not do it unless absolutely necessary.
164 *
165 * The statistics clock may (or may not) be run at a higher rate while
166 * profiling. This profile clock runs at profhz. We require that profhz
167 * be an integral multiple of stathz.
168 *
169 * If the statistics clock is running fast, it must be divided by the ratio
170 * profhz/stathz for statistics. (For profiling, every tick counts.)
171 *
172 * Time-of-day is maintained using a "timecounter", which may or may
173 * not be related to the hardware generating the above mentioned
174 * interrupts.
175 */
176
177int stathz;
178int profhz;
179static int profprocs;
180int ticks;
6ad39cae
MD
181static int psticks; /* profiler ticks */
182static int psdiv; /* prof / stat divider */
183int psratio; /* ratio: prof * 100 / stat */
984263bc
MD
184
185/*
186 * Initialize clock frequencies and start both clocks running.
187 */
188/* ARGSUSED*/
189static void
190initclocks(dummy)
191 void *dummy;
192{
1fd87d54 193 int i;
984263bc
MD
194
195 /*
196 * Set divisors to 1 (normal case) and let the machine-specific
197 * code do its bit.
198 */
6ad39cae 199 psdiv = 1;
984263bc
MD
200 cpu_initclocks();
201
202#ifdef DEVICE_POLLING
203 init_device_poll();
204#endif
205
206 /*
207 * Compute profhz/stathz, and fix profhz if needed.
208 */
209 i = stathz ? stathz : hz;
210 if (profhz == 0)
211 profhz = i;
212 psratio = profhz / i;
213}
214
215/*
4b5f931b
MD
216 * The real-time timer, interrupting hz times per second. This is implemented
217 * as a FAST interrupt so it is in the context of the thread it interrupted,
218 * and not in an interrupt thread. YYY needs help.
984263bc
MD
219 */
220void
221hardclock(frame)
1fd87d54 222 struct clockframe *frame;
984263bc 223{
1fd87d54 224 struct proc *p;
984263bc
MD
225
226 p = curproc;
227 if (p) {
1fd87d54 228 struct pstats *pstats;
984263bc
MD
229
230 /*
231 * Run current process's virtual and profile time, as needed.
232 */
233 pstats = p->p_stats;
234 if (CLKF_USERMODE(frame) &&
235 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
236 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
237 psignal(p, SIGVTALRM);
238 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
239 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
240 psignal(p, SIGPROF);
241 }
242
6ad39cae 243#if 0 /* SMP and BETTER_CLOCK */
984263bc
MD
244 forward_hardclock(pscnt);
245#endif
246
247 /*
248 * If no separate statistics clock is available, run it from here.
249 */
250 if (stathz == 0)
251 statclock(frame);
252
253 tco_forward(0);
254 ticks++;
255
256#ifdef DEVICE_POLLING
257 hardclock_device_poll(); /* this is very short and quick */
258#endif /* DEVICE_POLLING */
259
260 /*
261 * Process callouts at a very low cpu priority, so we don't keep the
262 * relatively high clock interrupt priority any longer than necessary.
263 */
264 if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) {
b68b7282
MD
265 setsoftclock();
266 } else if (softticks + 1 == ticks) {
984263bc 267 ++softticks;
b68b7282 268 }
984263bc
MD
269}
270
271/*
a94976ad
MD
272 * Compute number of ticks for the specified amount of time. The
273 * return value is intended to be used in a clock interrupt timed
274 * operation and guarenteed to meet or exceed the requested time.
275 * If the representation overflows, return INT_MAX. The minimum return
276 * value is 1 ticks and the function will average the calculation up.
277 * If any value greater then 0 microseconds is supplied, a value
278 * of at least 2 will be returned to ensure that a near-term clock
279 * interrupt does not cause the timeout to occur (degenerately) early.
280 *
281 * Note that limit checks must take into account microseconds, which is
282 * done simply by using the smaller signed long maximum instead of
283 * the unsigned long maximum.
284 *
285 * If ints have 32 bits, then the maximum value for any timeout in
286 * 10ms ticks is 248 days.
984263bc
MD
287 */
288int
a94976ad 289tvtohz_high(struct timeval *tv)
984263bc 290{
a94976ad 291 int ticks;
1fd87d54 292 long sec, usec;
984263bc 293
984263bc
MD
294 sec = tv->tv_sec;
295 usec = tv->tv_usec;
296 if (usec < 0) {
297 sec--;
298 usec += 1000000;
299 }
300 if (sec < 0) {
301#ifdef DIAGNOSTIC
302 if (usec > 0) {
303 sec++;
304 usec -= 1000000;
305 }
306 printf("tvotohz: negative time difference %ld sec %ld usec\n",
307 sec, usec);
308#endif
309 ticks = 1;
a94976ad
MD
310 } else if (sec <= INT_MAX / hz) {
311 ticks = (int)(sec * hz +
312 ((u_long)usec + (tick - 1)) / tick) + 1;
313 } else {
314 ticks = INT_MAX;
315 }
316 return (ticks);
317}
318
319/*
320 * Compute number of ticks for the specified amount of time, erroring on
321 * the side of it being too low to ensure that sleeping the returned number
322 * of ticks will not result in a late return.
323 *
324 * The supplied timeval may not be negative and should be normalized. A
325 * return value of 0 is possible if the timeval converts to less then
326 * 1 tick.
327 *
328 * If ints have 32 bits, then the maximum value for any timeout in
329 * 10ms ticks is 248 days.
330 */
331int
332tvtohz_low(struct timeval *tv)
333{
334 int ticks;
335 long sec;
336
337 sec = tv->tv_sec;
338 if (sec <= INT_MAX / hz)
339 ticks = (int)(sec * hz + (u_long)tv->tv_usec / tick);
984263bc 340 else
984263bc 341 ticks = INT_MAX;
a94976ad 342 return (ticks);
984263bc
MD
343}
344
a94976ad 345
984263bc
MD
346/*
347 * Start profiling on a process.
348 *
349 * Kernel profiling passes proc0 which never exits and hence
350 * keeps the profile clock running constantly.
351 */
352void
353startprofclock(p)
1fd87d54 354 struct proc *p;
984263bc
MD
355{
356 int s;
357
358 if ((p->p_flag & P_PROFIL) == 0) {
359 p->p_flag |= P_PROFIL;
360 if (++profprocs == 1 && stathz != 0) {
361 s = splstatclock();
6ad39cae 362 psdiv = psratio;
984263bc
MD
363 setstatclockrate(profhz);
364 splx(s);
365 }
366 }
367}
368
369/*
370 * Stop profiling on a process.
371 */
372void
373stopprofclock(p)
1fd87d54 374 struct proc *p;
984263bc
MD
375{
376 int s;
377
378 if (p->p_flag & P_PROFIL) {
379 p->p_flag &= ~P_PROFIL;
380 if (--profprocs == 0 && stathz != 0) {
381 s = splstatclock();
6ad39cae 382 psdiv = 1;
984263bc
MD
383 setstatclockrate(stathz);
384 splx(s);
385 }
386 }
387}
388
389/*
390 * Statistics clock. Grab profile sample, and if divider reaches 0,
391 * do process and kernel statistics. Most of the statistics are only
392 * used by user-level statistics programs. The main exceptions are
393 * p->p_uticks, p->p_sticks, p->p_iticks, and p->p_estcpu.
4b5f931b
MD
394 *
395 * The statclock should be called from an exclusive, fast interrupt,
396 * so the context should be the thread/process that got interrupted and
397 * not an interrupt thread.
984263bc
MD
398 */
399void
400statclock(frame)
1fd87d54 401 struct clockframe *frame;
984263bc
MD
402{
403#ifdef GPROF
1fd87d54 404 struct gmonparam *g;
984263bc
MD
405 int i;
406#endif
d16a8831 407 thread_t td;
984263bc
MD
408 struct pstats *pstats;
409 long rss;
410 struct rusage *ru;
411 struct vmspace *vm;
d16a8831
MD
412 struct proc *p;
413 int bump;
414 struct timeval tv;
415 struct timeval *stv;
984263bc 416
d16a8831
MD
417 /*
418 * How big was our timeslice relative to the last time
419 */
420 microuptime(&tv);
421 stv = &mycpu->gd_stattv;
422 if (stv->tv_sec == 0) {
423 bump = 1;
424 } else {
425 bump = tv.tv_usec - stv->tv_usec +
426 (tv.tv_sec - stv->tv_sec) * 1000000;
427 if (bump < 0)
428 bump = 0;
429 if (bump > 1000000)
430 bump = 1000000;
431 }
432 *stv = tv;
433
434 td = curthread;
435 p = td->td_proc;
436
437 if (CLKF_USERMODE(frame)) {
984263bc 438 /*
d16a8831
MD
439 * Came from userland, handle user time and deal with
440 * possible process.
984263bc 441 */
d16a8831 442 if (p && (p->p_flag & P_PROFIL))
984263bc 443 addupc_intr(p, CLKF_PC(frame), 1);
6ad39cae 444#if 0 /* SMP and BETTER_CLOCK */
984263bc
MD
445 if (stathz != 0)
446 forward_statclock(pscnt);
447#endif
d16a8831 448 td->td_uticks += bump;
d16a8831 449
984263bc 450 /*
d16a8831 451 * Charge the time as appropriate
984263bc 452 */
d16a8831 453 if (p && p->p_nice > NZERO)
6ad39cae 454 cp_time[CP_NICE] += bump;
984263bc 455 else
6ad39cae 456 cp_time[CP_USER] += bump;
984263bc
MD
457 } else {
458#ifdef GPROF
459 /*
460 * Kernel statistics are just like addupc_intr, only easier.
461 */
462 g = &_gmonparam;
463 if (g->state == GMON_PROF_ON) {
464 i = CLKF_PC(frame) - g->lowpc;
465 if (i < g->textsize) {
466 i /= HISTFRACTION * sizeof(*g->kcount);
467 g->kcount[i]++;
468 }
469 }
470#endif
6ad39cae 471#if 0 /* SMP and BETTER_CLOCK */
984263bc
MD
472 if (stathz != 0)
473 forward_statclock(pscnt);
474#endif
984263bc
MD
475 /*
476 * Came from kernel mode, so we were:
477 * - handling an interrupt,
478 * - doing syscall or trap work on behalf of the current
479 * user process, or
480 * - spinning in the idle loop.
481 * Whichever it is, charge the time as appropriate.
482 * Note that we charge interrupts to the current process,
483 * regardless of whether they are ``for'' that process,
484 * so that we know how much of its real time was spent
485 * in ``non-process'' (i.e., interrupt) work.
486 */
d16a8831
MD
487 if (CLKF_INTR(frame))
488 td->td_iticks += bump;
489 else
490 td->td_sticks += bump;
491
984263bc 492 if (CLKF_INTR(frame)) {
6ad39cae 493 cp_time[CP_INTR] += bump;
d16a8831 494 } else {
a2a5ad0d 495 if (td == &mycpu->gd_idlethread)
6ad39cae 496 cp_time[CP_IDLE] += bump;
d16a8831 497 else
6ad39cae 498 cp_time[CP_SYS] += bump;
d16a8831 499 }
984263bc 500 }
6ad39cae
MD
501
502 /*
503 * bump psticks and check against gd_psticks. When we hit the
18ec3a74
MD
504 * 1*hz mark (psdiv ticks) we do the more expensive stuff. If
505 * psdiv changes we reset everything to avoid confusion.
6ad39cae
MD
506 */
507 ++psticks;
18ec3a74 508 if (psticks < mycpu->gd_psticks && psdiv == mycpu->gd_psdiv)
6ad39cae
MD
509 return;
510
511 mycpu->gd_psdiv = psdiv;
512 mycpu->gd_psticks = psticks + psdiv;
984263bc 513
435ff993
MD
514 /*
515 * XXX YYY DragonFly... need to rewrite all of this,
516 * only schedclock is distributed at the moment
517 */
518 schedclock(NULL);
519#ifdef SMP
520 if (smp_started && invltlb_ok && !cold && !panicstr) /* YYY */
521 lwkt_send_ipiq_mask(mycpu->gd_other_cpus, schedclock, NULL);
522#endif
984263bc 523
435ff993 524 if (p != NULL) {
984263bc
MD
525 /* Update resource usage integrals and maximums. */
526 if ((pstats = p->p_stats) != NULL &&
527 (ru = &pstats->p_ru) != NULL &&
528 (vm = p->p_vmspace) != NULL) {
529 ru->ru_ixrss += pgtok(vm->vm_tsize);
530 ru->ru_idrss += pgtok(vm->vm_dsize);
531 ru->ru_isrss += pgtok(vm->vm_ssize);
532 rss = pgtok(vmspace_resident_count(vm));
533 if (ru->ru_maxrss < rss)
534 ru->ru_maxrss = rss;
535 }
536 }
537}
538
539/*
540 * Return information about system clocks.
541 */
542static int
543sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS)
544{
545 struct clockinfo clkinfo;
546 /*
547 * Construct clockinfo structure.
548 */
549 clkinfo.hz = hz;
550 clkinfo.tick = tick;
551 clkinfo.tickadj = tickadj;
552 clkinfo.profhz = profhz;
553 clkinfo.stathz = stathz ? stathz : hz;
554 return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
555}
556
557SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD,
558 0, 0, sysctl_kern_clockrate, "S,clockinfo","");
559
560static __inline unsigned
561tco_delta(struct timecounter *tc)
562{
563
564 return ((tc->tc_get_timecount(tc) - tc->tc_offset_count) &
565 tc->tc_counter_mask);
566}
567
568/*
569 * We have eight functions for looking at the clock, four for
570 * microseconds and four for nanoseconds. For each there is fast
571 * but less precise version "get{nano|micro}[up]time" which will
572 * return a time which is up to 1/HZ previous to the call, whereas
573 * the raw version "{nano|micro}[up]time" will return a timestamp
574 * which is as precise as possible. The "up" variants return the
575 * time relative to system boot, these are well suited for time
576 * interval measurements.
577 */
578
579void
580getmicrotime(struct timeval *tvp)
581{
582 struct timecounter *tc;
583
584 if (!tco_method) {
585 tc = timecounter;
586 *tvp = tc->tc_microtime;
587 } else {
588 microtime(tvp);
589 }
590}
591
592void
593getnanotime(struct timespec *tsp)
594{
595 struct timecounter *tc;
596
597 if (!tco_method) {
598 tc = timecounter;
599 *tsp = tc->tc_nanotime;
600 } else {
601 nanotime(tsp);
602 }
603}
604
605void
606microtime(struct timeval *tv)
607{
608 struct timecounter *tc;
2689779e 609 int delta;
984263bc
MD
610
611 tc = timecounter;
2689779e
MD
612 crit_enter();
613 delta = tco_delta(tc);
984263bc
MD
614 tv->tv_sec = tc->tc_offset_sec;
615 tv->tv_usec = tc->tc_offset_micro;
2689779e
MD
616 tv->tv_usec += ((u_int64_t)delta * tc->tc_scale_micro) >> 32;
617 crit_exit();
984263bc
MD
618 tv->tv_usec += boottime.tv_usec;
619 tv->tv_sec += boottime.tv_sec;
620 while (tv->tv_usec < 0) {
621 tv->tv_usec += 1000000;
622 if (tv->tv_sec > 0)
623 tv->tv_sec--;
624 }
625 while (tv->tv_usec >= 1000000) {
626 tv->tv_usec -= 1000000;
627 tv->tv_sec++;
628 }
629}
630
631void
632nanotime(struct timespec *ts)
633{
634 unsigned count;
635 u_int64_t delta;
636 struct timecounter *tc;
637
638 tc = timecounter;
2689779e 639 crit_enter();
984263bc
MD
640 ts->tv_sec = tc->tc_offset_sec;
641 count = tco_delta(tc);
642 delta = tc->tc_offset_nano;
2689779e 643 crit_exit();
984263bc
MD
644 delta += ((u_int64_t)count * tc->tc_scale_nano_f);
645 delta >>= 32;
646 delta += ((u_int64_t)count * tc->tc_scale_nano_i);
647 delta += boottime.tv_usec * 1000;
648 ts->tv_sec += boottime.tv_sec;
649 while (delta < 0) {
650 delta += 1000000000;
651 if (ts->tv_sec > 0)
652 ts->tv_sec--;
653 }
654 while (delta >= 1000000000) {
655 delta -= 1000000000;
656 ts->tv_sec++;
657 }
658 ts->tv_nsec = delta;
659}
660
661void
662getmicrouptime(struct timeval *tvp)
663{
664 struct timecounter *tc;
665
666 if (!tco_method) {
667 tc = timecounter;
668 tvp->tv_sec = tc->tc_offset_sec;
669 tvp->tv_usec = tc->tc_offset_micro;
670 } else {
671 microuptime(tvp);
672 }
673}
674
675void
676getnanouptime(struct timespec *tsp)
677{
678 struct timecounter *tc;
679
680 if (!tco_method) {
681 tc = timecounter;
682 tsp->tv_sec = tc->tc_offset_sec;
683 tsp->tv_nsec = tc->tc_offset_nano >> 32;
684 } else {
685 nanouptime(tsp);
686 }
687}
688
689void
690microuptime(struct timeval *tv)
691{
692 struct timecounter *tc;
693
694 tc = timecounter;
695 tv->tv_sec = tc->tc_offset_sec;
696 tv->tv_usec = tc->tc_offset_micro;
697 tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32;
698 while (tv->tv_usec < 0) {
699 tv->tv_usec += 1000000;
700 if (tv->tv_sec > 0)
701 tv->tv_sec--;
702 }
703 while (tv->tv_usec >= 1000000) {
704 tv->tv_usec -= 1000000;
705 tv->tv_sec++;
706 }
707}
708
709void
710nanouptime(struct timespec *ts)
711{
712 unsigned count;
713 u_int64_t delta;
714 struct timecounter *tc;
715
716 tc = timecounter;
717 ts->tv_sec = tc->tc_offset_sec;
718 count = tco_delta(tc);
719 delta = tc->tc_offset_nano;
720 delta += ((u_int64_t)count * tc->tc_scale_nano_f);
721 delta >>= 32;
722 delta += ((u_int64_t)count * tc->tc_scale_nano_i);
723 while (delta < 0) {
724 delta += 1000000000;
725 if (ts->tv_sec > 0)
726 ts->tv_sec--;
727 }
728 while (delta >= 1000000000) {
729 delta -= 1000000000;
730 ts->tv_sec++;
731 }
732 ts->tv_nsec = delta;
733}
734
735static void
736tco_setscales(struct timecounter *tc)
737{
738 u_int64_t scale;
739
740 scale = 1000000000LL << 32;
741 scale += tc->tc_adjustment;
742 scale /= tc->tc_tweak->tc_frequency;
743 tc->tc_scale_micro = scale / 1000;
744 tc->tc_scale_nano_f = scale & 0xffffffff;
745 tc->tc_scale_nano_i = scale >> 32;
746}
747
748void
749update_timecounter(struct timecounter *tc)
750{
751 tco_setscales(tc);
752}
753
754void
755init_timecounter(struct timecounter *tc)
756{
757 struct timespec ts1;
758 struct timecounter *t1, *t2, *t3;
759 unsigned u;
760 int i;
761
762 u = tc->tc_frequency / tc->tc_counter_mask;
763 if (u > hz) {
764 printf("Timecounter \"%s\" frequency %lu Hz"
765 " -- Insufficient hz, needs at least %u\n",
766 tc->tc_name, (u_long) tc->tc_frequency, u);
767 return;
768 }
769
770 tc->tc_adjustment = 0;
771 tc->tc_tweak = tc;
772 tco_setscales(tc);
773 tc->tc_offset_count = tc->tc_get_timecount(tc);
774 if (timecounter == &dummy_timecounter)
775 tc->tc_avail = tc;
776 else {
777 tc->tc_avail = timecounter->tc_tweak->tc_avail;
778 timecounter->tc_tweak->tc_avail = tc;
779 }
780 MALLOC(t1, struct timecounter *, sizeof *t1, M_TIMECOUNTER, M_WAITOK);
781 tc->tc_other = t1;
782 *t1 = *tc;
783 t2 = t1;
784 for (i = 1; i < NTIMECOUNTER; i++) {
785 MALLOC(t3, struct timecounter *, sizeof *t3,
786 M_TIMECOUNTER, M_WAITOK);
787 *t3 = *tc;
788 t3->tc_other = t2;
789 t2 = t3;
790 }
791 t1->tc_other = t3;
792 tc = t1;
793
794 printf("Timecounter \"%s\" frequency %lu Hz\n",
795 tc->tc_name, (u_long)tc->tc_frequency);
796
797 /* XXX: For now always start using the counter. */
798 tc->tc_offset_count = tc->tc_get_timecount(tc);
799 nanouptime(&ts1);
800 tc->tc_offset_nano = (u_int64_t)ts1.tv_nsec << 32;
801 tc->tc_offset_micro = ts1.tv_nsec / 1000;
802 tc->tc_offset_sec = ts1.tv_sec;
803 timecounter = tc;
804}
805
806void
807set_timecounter(struct timespec *ts)
808{
809 struct timespec ts2;
810
811 nanouptime(&ts2);
812 boottime.tv_sec = ts->tv_sec - ts2.tv_sec;
813 boottime.tv_usec = (ts->tv_nsec - ts2.tv_nsec) / 1000;
814 if (boottime.tv_usec < 0) {
815 boottime.tv_usec += 1000000;
816 boottime.tv_sec--;
817 }
818 /* fiddle all the little crinkly bits around the fiords... */
819 tco_forward(1);
820}
821
822static void
823switch_timecounter(struct timecounter *newtc)
824{
825 int s;
826 struct timecounter *tc;
827 struct timespec ts;
828
829 s = splclock();
830 tc = timecounter;
831 if (newtc->tc_tweak == tc->tc_tweak) {
832 splx(s);
833 return;
834 }
835 newtc = newtc->tc_tweak->tc_other;
836 nanouptime(&ts);
837 newtc->tc_offset_sec = ts.tv_sec;
838 newtc->tc_offset_nano = (u_int64_t)ts.tv_nsec << 32;
839 newtc->tc_offset_micro = ts.tv_nsec / 1000;
840 newtc->tc_offset_count = newtc->tc_get_timecount(newtc);
841 tco_setscales(newtc);
842 timecounter = newtc;
843 splx(s);
844}
845
846static struct timecounter *
847sync_other_counter(void)
848{
849 struct timecounter *tc, *tcn, *tco;
850 unsigned delta;
851
852 tco = timecounter;
853 tc = tco->tc_other;
854 tcn = tc->tc_other;
855 *tc = *tco;
856 tc->tc_other = tcn;
857 delta = tco_delta(tc);
858 tc->tc_offset_count += delta;
859 tc->tc_offset_count &= tc->tc_counter_mask;
860 tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_f;
861 tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_i << 32;
862 return (tc);
863}
864
865static void
866tco_forward(int force)
867{
868 struct timecounter *tc, *tco;
869 struct timeval tvt;
870
871 tco = timecounter;
872 tc = sync_other_counter();
873 /*
874 * We may be inducing a tiny error here, the tc_poll_pps() may
875 * process a latched count which happens after the tco_delta()
876 * in sync_other_counter(), which would extend the previous
877 * counters parameters into the domain of this new one.
878 * Since the timewindow is very small for this, the error is
879 * going to be only a few weenieseconds (as Dave Mills would
880 * say), so lets just not talk more about it, OK ?
881 */
882 if (tco->tc_poll_pps)
883 tco->tc_poll_pps(tco);
884 if (timedelta != 0) {
885 tvt = boottime;
886 tvt.tv_usec += tickdelta;
887 if (tvt.tv_usec >= 1000000) {
888 tvt.tv_sec++;
889 tvt.tv_usec -= 1000000;
890 } else if (tvt.tv_usec < 0) {
891 tvt.tv_sec--;
892 tvt.tv_usec += 1000000;
893 }
894 boottime = tvt;
895 timedelta -= tickdelta;
896 }
897
898 while (tc->tc_offset_nano >= 1000000000ULL << 32) {
899 tc->tc_offset_nano -= 1000000000ULL << 32;
900 tc->tc_offset_sec++;
901 ntp_update_second(tc); /* XXX only needed if xntpd runs */
902 tco_setscales(tc);
903 force++;
904 }
905
906 if (tco_method && !force)
907 return;
908
909 tc->tc_offset_micro = (tc->tc_offset_nano / 1000) >> 32;
910
911 /* Figure out the wall-clock time */
912 tc->tc_nanotime.tv_sec = tc->tc_offset_sec + boottime.tv_sec;
913 tc->tc_nanotime.tv_nsec =
914 (tc->tc_offset_nano >> 32) + boottime.tv_usec * 1000;
915 tc->tc_microtime.tv_usec = tc->tc_offset_micro + boottime.tv_usec;
916 while (tc->tc_nanotime.tv_nsec >= 1000000000) {
917 tc->tc_nanotime.tv_nsec -= 1000000000;
918 tc->tc_microtime.tv_usec -= 1000000;
919 tc->tc_nanotime.tv_sec++;
920 }
921 time_second = tc->tc_microtime.tv_sec = tc->tc_nanotime.tv_sec;
922
923 timecounter = tc;
924}
925
926SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, "");
927
928SYSCTL_INT(_kern_timecounter, OID_AUTO, method, CTLFLAG_RW, &tco_method, 0,
929 "This variable determines the method used for updating timecounters. "
930 "If the default algorithm (0) fails with \"calcru negative...\" messages "
931 "try the alternate algorithm (1) which handles bad hardware better."
932
933);
934
935static int
936sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS)
937{
938 char newname[32];
939 struct timecounter *newtc, *tc;
940 int error;
941
942 tc = timecounter->tc_tweak;
943 strncpy(newname, tc->tc_name, sizeof(newname));
944 error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req);
945 if (error == 0 && req->newptr != NULL &&
946 strcmp(newname, tc->tc_name) != 0) {
947 for (newtc = tc->tc_avail; newtc != tc;
948 newtc = newtc->tc_avail) {
949 if (strcmp(newname, newtc->tc_name) == 0) {
950 /* Warm up new timecounter. */
951 (void)newtc->tc_get_timecount(newtc);
952
953 switch_timecounter(newtc);
954 return (0);
955 }
956 }
957 return (EINVAL);
958 }
959 return (error);
960}
961
962SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, CTLTYPE_STRING | CTLFLAG_RW,
963 0, 0, sysctl_kern_timecounter_hardware, "A", "");
964
965
966int
967pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps)
968{
969 pps_params_t *app;
970 struct pps_fetch_args *fapi;
971#ifdef PPS_SYNC
972 struct pps_kcbind_args *kapi;
973#endif
974
975 switch (cmd) {
976 case PPS_IOC_CREATE:
977 return (0);
978 case PPS_IOC_DESTROY:
979 return (0);
980 case PPS_IOC_SETPARAMS:
981 app = (pps_params_t *)data;
982 if (app->mode & ~pps->ppscap)
983 return (EINVAL);
984 pps->ppsparam = *app;
985 return (0);
986 case PPS_IOC_GETPARAMS:
987 app = (pps_params_t *)data;
988 *app = pps->ppsparam;
989 app->api_version = PPS_API_VERS_1;
990 return (0);
991 case PPS_IOC_GETCAP:
992 *(int*)data = pps->ppscap;
993 return (0);
994 case PPS_IOC_FETCH:
995 fapi = (struct pps_fetch_args *)data;
996 if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC)
997 return (EINVAL);
998 if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec)
999 return (EOPNOTSUPP);
1000 pps->ppsinfo.current_mode = pps->ppsparam.mode;
1001 fapi->pps_info_buf = pps->ppsinfo;
1002 return (0);
1003 case PPS_IOC_KCBIND:
1004#ifdef PPS_SYNC
1005 kapi = (struct pps_kcbind_args *)data;
1006 /* XXX Only root should be able to do this */
1007 if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC)
1008 return (EINVAL);
1009 if (kapi->kernel_consumer != PPS_KC_HARDPPS)
1010 return (EINVAL);
1011 if (kapi->edge & ~pps->ppscap)
1012 return (EINVAL);
1013 pps->kcmode = kapi->edge;
1014 return (0);
1015#else
1016 return (EOPNOTSUPP);
1017#endif
1018 default:
1019 return (ENOTTY);
1020 }
1021}
1022
1023void
1024pps_init(struct pps_state *pps)
1025{
1026 pps->ppscap |= PPS_TSFMT_TSPEC;
1027 if (pps->ppscap & PPS_CAPTUREASSERT)
1028 pps->ppscap |= PPS_OFFSETASSERT;
1029 if (pps->ppscap & PPS_CAPTURECLEAR)
1030 pps->ppscap |= PPS_OFFSETCLEAR;
1031}
1032
1033void
1034pps_event(struct pps_state *pps, struct timecounter *tc, unsigned count, int event)
1035{
1036 struct timespec ts, *tsp, *osp;
1037 u_int64_t delta;
1038 unsigned tcount, *pcount;
1039 int foff, fhard;
1040 pps_seq_t *pseq;
1041
1042 /* Things would be easier with arrays... */
1043 if (event == PPS_CAPTUREASSERT) {
1044 tsp = &pps->ppsinfo.assert_timestamp;
1045 osp = &pps->ppsparam.assert_offset;
1046 foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
1047 fhard = pps->kcmode & PPS_CAPTUREASSERT;
1048 pcount = &pps->ppscount[0];
1049 pseq = &pps->ppsinfo.assert_sequence;
1050 } else {
1051 tsp = &pps->ppsinfo.clear_timestamp;
1052 osp = &pps->ppsparam.clear_offset;
1053 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
1054 fhard = pps->kcmode & PPS_CAPTURECLEAR;
1055 pcount = &pps->ppscount[1];
1056 pseq = &pps->ppsinfo.clear_sequence;
1057 }
1058
1059 /* The timecounter changed: bail */
1060 if (!pps->ppstc ||
1061 pps->ppstc->tc_name != tc->tc_name ||
1062 tc->tc_name != timecounter->tc_name) {
1063 pps->ppstc = tc;
1064 *pcount = count;
1065 return;
1066 }
1067
1068 /* Nothing really happened */
1069 if (*pcount == count)
1070 return;
1071
1072 *pcount = count;
1073
1074 /* Convert the count to timespec */
1075 ts.tv_sec = tc->tc_offset_sec;
1076 tcount = count - tc->tc_offset_count;
1077 tcount &= tc->tc_counter_mask;
1078 delta = tc->tc_offset_nano;
1079 delta += ((u_int64_t)tcount * tc->tc_scale_nano_f);
1080 delta >>= 32;
1081 delta += ((u_int64_t)tcount * tc->tc_scale_nano_i);
1082 delta += boottime.tv_usec * 1000;
1083 ts.tv_sec += boottime.tv_sec;
1084 while (delta >= 1000000000) {
1085 delta -= 1000000000;
1086 ts.tv_sec++;
1087 }
1088 ts.tv_nsec = delta;
1089
1090 (*pseq)++;
1091 *tsp = ts;
1092
1093 if (foff) {
1094 timespecadd(tsp, osp);
1095 if (tsp->tv_nsec < 0) {
1096 tsp->tv_nsec += 1000000000;
1097 tsp->tv_sec -= 1;
1098 }
1099 }
1100#ifdef PPS_SYNC
1101 if (fhard) {
1102 /* magic, at its best... */
1103 tcount = count - pps->ppscount[2];
1104 pps->ppscount[2] = count;
1105 tcount &= tc->tc_counter_mask;
1106 delta = ((u_int64_t)tcount * tc->tc_tweak->tc_scale_nano_f);
1107 delta >>= 32;
1108 delta += ((u_int64_t)tcount * tc->tc_tweak->tc_scale_nano_i);
1109 hardpps(tsp, delta);
1110 }
1111#endif
1112}