2 * Copyright (c) 1990 The Regents of the University of California.
3 * Copyright (c) 2008-2021 The DragonFly Project. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * William Jolitz and Don Ahn.
8 * This code is derived from software contributed to The DragonFly Project
9 * by Matthew Dillon <dillon@backplane.com>
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * from: @(#)clock.c 7.2 (Berkeley) 5/12/91
36 * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $
40 * Routines to handle clock hardware.
44 * inittodr, settodr and support routines written
45 * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at>
47 * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94
51 #include "opt_clock.h"
54 #include <sys/param.h>
55 #include <sys/systm.h>
56 #include <sys/eventhandler.h>
58 #include <sys/kernel.h>
60 #include <sys/sysctl.h>
63 #include <sys/systimer.h>
64 #include <sys/globaldata.h>
65 #include <sys/machintr.h>
66 #include <sys/interrupt.h>
68 #include <sys/thread2.h>
70 #include <machine/clock.h>
71 #include <machine/cputypes.h>
72 #include <machine/frame.h>
73 #include <machine/ipl.h>
74 #include <machine/limits.h>
75 #include <machine/md_var.h>
76 #include <machine/psl.h>
77 #include <machine/segments.h>
78 #include <machine/smp.h>
79 #include <machine/specialreg.h>
80 #include <machine/intr_machdep.h>
82 #include <machine_base/apic/ioapic.h>
83 #include <machine_base/apic/ioapic_abi.h>
84 #include <machine_base/icu/icu.h>
85 #include <bus/isa/isa.h>
86 #include <bus/isa/rtc.h>
87 #include <machine_base/isa/timerreg.h>
89 SET_DECLARE(timecounter_init_set, const timecounter_init_t);
90 TIMECOUNTER_INIT(placeholder, NULL);
92 static void i8254_restore(void);
93 static void resettodr_on_shutdown(void *arg __unused);
96 * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
97 * can use a simple formula for leap years.
99 #define LEAPYEAR(y) ((u_int)(y) % 4 == 0)
100 #define DAYSPERYEAR (31+28+31+30+31+30+31+31+30+31+30+31)
103 #define TIMER_FREQ 1193182
106 static uint8_t i8254_walltimer_sel;
107 static uint16_t i8254_walltimer_cntr;
108 static int timer0_running;
110 int adjkerntz; /* local offset from GMT in seconds */
111 int disable_rtc_set; /* disable resettodr() if != 0 */
115 int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */
116 tsc_uclock_t tsc_frequency;
117 tsc_uclock_t tsc_oneus_approx; /* always at least 1, approx only */
119 enum tstate { RELEASED, ACQUIRED };
120 static enum tstate timer0_state;
121 static enum tstate timer1_state;
122 static enum tstate timer2_state;
124 int i8254_cputimer_disable; /* No need to initialize i8254 cputimer. */
126 static int beeping = 0;
127 static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
128 static u_char rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
129 static u_char rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
130 static int rtc_loaded;
132 static sysclock_t i8254_cputimer_div;
134 static int i8254_nointr;
135 static int i8254_intr_disable = 1;
136 TUNABLE_INT("hw.i8254.intr_disable", &i8254_intr_disable);
138 static int calibrate_timers_with_rtc = 0;
139 TUNABLE_INT("hw.calibrate_timers_with_rtc", &calibrate_timers_with_rtc);
141 static int calibrate_tsc_fast = 1;
142 TUNABLE_INT("hw.calibrate_tsc_fast", &calibrate_tsc_fast);
144 static int calibrate_test;
145 TUNABLE_INT("hw.tsc_calibrate_test", &calibrate_test);
147 static struct callout sysbeepstop_ch;
149 static sysclock_t i8254_cputimer_count(void);
150 static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last);
151 static void i8254_cputimer_destruct(struct cputimer *cputimer);
153 static struct cputimer i8254_cputimer = {
154 .next = SLIST_ENTRY_INITIALIZER,
156 .pri = CPUTIMER_PRI_8254,
157 .type = 0, /* determined later */
158 .count = i8254_cputimer_count,
159 .fromhz = cputimer_default_fromhz,
160 .fromus = cputimer_default_fromus,
161 .construct = i8254_cputimer_construct,
162 .destruct = i8254_cputimer_destruct,
166 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t);
167 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *);
168 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t);
170 static struct cputimer_intr i8254_cputimer_intr = {
172 .reload = i8254_intr_reload,
173 .enable = cputimer_intr_default_enable,
174 .config = i8254_intr_config,
175 .restart = cputimer_intr_default_restart,
176 .pmfixup = cputimer_intr_default_pmfixup,
177 .initclock = i8254_intr_initclock,
179 .next = SLIST_ENTRY_INITIALIZER,
181 .type = CPUTIMER_INTR_8254,
182 .prio = CPUTIMER_INTR_PRIO_8254,
183 .caps = CPUTIMER_INTR_CAP_PS,
188 * Use this to lwkt_switch() when the scheduler clock is not
189 * yet running, otherwise lwkt_switch() won't do anything.
190 * XXX needs cleaning up in lwkt_thread.c
193 lwkt_force_switch(void)
196 lwkt_schedulerclock(curthread);
202 * timer0 clock interrupt. Timer0 is in one-shot mode and has stopped
203 * counting as of this interrupt. We use timer1 in free-running mode (not
204 * generating any interrupts) as our main counter. Each cpu has timeouts
207 * This code is INTR_MPSAFE and may be called without the BGL held.
210 clkintr(void *dummy, void *frame_arg)
212 static sysclock_t sysclock_count; /* NOTE! Must be static */
213 struct globaldata *gd = mycpu;
214 struct globaldata *gscan;
218 * SWSTROBE mode is a one-shot, the timer is no longer running
223 * XXX the dispatcher needs work. right now we call systimer_intr()
224 * directly or via IPI for any cpu with systimers queued, which is
225 * usually *ALL* of them. We need to use the LAPIC timer for this.
227 sysclock_count = sys_cputimer->count();
228 for (n = 0; n < ncpus; ++n) {
229 gscan = globaldata_find(n);
230 if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL)
233 lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr,
236 systimer_intr(&sysclock_count, 0, frame_arg);
246 acquire_timer2(int mode)
248 if (timer2_state != RELEASED)
250 timer2_state = ACQUIRED;
253 * This access to the timer registers is as atomic as possible
254 * because it is a single instruction. We could do better if we
257 outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f));
264 if (timer2_state != ACQUIRED)
266 outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT);
267 timer2_state = RELEASED;
275 DB_SHOW_COMMAND(rtc, rtc)
277 kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n",
278 rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY),
279 rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC),
280 rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR));
285 * Return the current cpu timer count as a 32 bit integer.
289 i8254_cputimer_count(void)
291 static uint16_t cputimer_last;
296 outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH);
297 count = (uint8_t)inb(i8254_walltimer_cntr); /* get countdown */
298 count |= ((uint8_t)inb(i8254_walltimer_cntr) << 8);
299 count = -count; /* -> countup */
300 if (count < cputimer_last) /* rollover */
301 i8254_cputimer.base += 0x00010000U;
302 ret = i8254_cputimer.base | count;
303 cputimer_last = count;
310 * This function is called whenever the system timebase changes, allowing
311 * us to calculate what is needed to convert a system timebase tick
312 * into an 8254 tick for the interrupt timer. If we can convert to a
313 * simple shift, multiplication, or division, we do so. Otherwise 64
314 * bit arithmatic is required every time the interrupt timer is reloaded.
317 i8254_intr_config(struct cputimer_intr *cti, const struct cputimer *timer)
323 * Will a simple divide do the trick?
325 div = (timer->freq + (cti->freq / 2)) / cti->freq;
326 freq = cti->freq * div;
328 if (freq >= timer->freq - 1 && freq <= timer->freq + 1)
329 i8254_cputimer_div = div;
331 i8254_cputimer_div = 0;
335 * Reload for the next timeout. It is possible for the reload value
336 * to be 0 or negative, indicating that an immediate timer interrupt
337 * is desired. For now make the minimum 2 ticks.
339 * We may have to convert from the system timebase to the 8254 timebase.
342 i8254_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
346 if ((ssysclock_t)reload < 0)
348 if (i8254_cputimer_div)
349 reload /= i8254_cputimer_div;
351 reload = muldivu64(reload, cti->freq, sys_cputimer->freq);
354 reload = 2; /* minimum count */
356 reload = 0xFFFF; /* almost full count (0 is full count) */
359 if (timer0_running) {
360 outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); /* count-down timer */
361 count = (uint8_t)inb(TIMER_CNTR0); /* lsb */
362 count |= ((uint8_t)inb(TIMER_CNTR0) << 8); /* msb */
363 if (reload < count) {
364 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
365 outb(TIMER_CNTR0, (uint8_t)reload); /* lsb */
366 outb(TIMER_CNTR0, (uint8_t)(reload >> 8)); /* msb */
370 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
371 outb(TIMER_CNTR0, (uint8_t)reload); /* lsb */
372 outb(TIMER_CNTR0, (uint8_t)(reload >> 8)); /* msb */
378 * DELAY(usec) - Spin for the specified number of microseconds.
379 * DRIVERSLEEP(usec) - Spin for the specified number of microseconds,
380 * but do a thread switch in the loop
382 * Relies on timer 1 counting down from (cputimer_freq / hz)
383 * Note: timer had better have been programmed before this is first used!
386 DODELAY(int n, int doswitch)
388 ssysclock_t delta, ticks_left;
389 sysclock_t prev_tick, tick;
394 static int state = 0;
398 for (n1 = 1; n1 <= 10000000; n1 *= 10)
403 kprintf("DELAY(%d)...", n);
406 * Guard against the timer being uninitialized if we are called
407 * early for console i/o.
409 if (timer0_state == RELEASED && i8254_cputimer_disable == 0)
413 * Read the counter first, so that the rest of the setup overhead is
414 * counted. Then calculate the number of hardware timer ticks
415 * required, rounding up to be sure we delay at least the requested
416 * number of microseconds.
418 prev_tick = sys_cputimer->count();
419 ticks_left = muldivu64(n, sys_cputimer->freq + 999999, 1000000);
424 while (ticks_left > 0) {
425 tick = sys_cputimer->count();
429 delta = tick - prev_tick;
434 if (doswitch && ticks_left > 0)
440 kprintf(" %d calls to getit() at %d usec each\n",
441 getit_calls, (n + 5) / getit_calls);
446 * DELAY() never switches.
455 * Returns non-zero if the specified time period has elapsed. Call
456 * first with last_clock set to 0.
459 CHECKTIMEOUT(TOTALDELAY *tdd)
464 if (tdd->started == 0) {
465 if (timer0_state == RELEASED && i8254_cputimer_disable == 0)
467 tdd->last_clock = sys_cputimer->count();
471 delta = sys_cputimer->count() - tdd->last_clock;
472 us = muldivu64(delta, 1000000, sys_cputimer->freq);
473 tdd->last_clock += muldivu64(us, sys_cputimer->freq, 1000000);
476 return (tdd->us < 0);
481 * DRIVERSLEEP() does not switch if called with a spinlock held or
482 * from a hard interrupt.
485 DRIVERSLEEP(int usec)
487 globaldata_t gd = mycpu;
489 if (gd->gd_intr_nesting_level || gd->gd_spinlocks) {
497 sysbeepstop(void *chan)
499 outb(IO_PPI, inb(IO_PPI)&0xFC); /* disable counter2 output to speaker */
505 sysbeep(int pitch, int period)
507 if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
509 if (sysbeep_enable == 0)
512 * Nobody else is using timer2, we do not need the clock lock
514 outb(TIMER_CNTR2, pitch);
515 outb(TIMER_CNTR2, (pitch>>8));
517 /* enable counter2 output to speaker */
518 outb(IO_PPI, inb(IO_PPI) | 3);
520 callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL);
526 * RTC support routines
537 val = inb(IO_RTC + 1);
544 writertc(u_char reg, u_char val)
550 outb(IO_RTC + 1, val);
551 inb(0x84); /* XXX work around wrong order in rtcin() */
558 return(bcd2bin(rtcin(port)));
562 calibrate_clocks(void)
564 tsc_uclock_t old_tsc;
565 sysclock_t tot_count;
566 sysclock_t count, prev_count;
567 int sec, start_sec, timeout;
570 kprintf("Calibrating clock(s) ...\n");
571 if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
575 /* Read the mc146818A seconds counter. */
577 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
578 sec = rtcin(RTC_SEC);
585 /* Wait for the mC146818A seconds counter to change. */
588 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
589 sec = rtcin(RTC_SEC);
590 if (sec != start_sec)
597 /* Start keeping track of the i8254 counter. */
598 prev_count = sys_cputimer->count();
604 old_tsc = 0; /* shut up gcc */
607 * Wait for the mc146818A seconds counter to change. Read the i8254
608 * counter for each iteration since this is convenient and only
609 * costs a few usec of inaccuracy. The timing of the final reads
610 * of the counters almost matches the timing of the initial reads,
611 * so the main cause of inaccuracy is the varying latency from
612 * inside getit() or rtcin(RTC_STATUSA) to the beginning of the
613 * rtcin(RTC_SEC) that returns a changed seconds count. The
614 * maximum inaccuracy from this cause is < 10 usec on 486's.
618 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP))
619 sec = rtcin(RTC_SEC);
620 count = sys_cputimer->count();
621 tot_count += (sysclock_t)(count - prev_count);
623 if (sec != start_sec)
630 * Read the cpu cycle counter. The timing considerations are
631 * similar to those for the i8254 clock.
634 tsc_frequency = rdtsc() - old_tsc;
636 kprintf("TSC clock: %jd Hz (Method A)\n",
637 (intmax_t)tsc_frequency);
640 tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
642 kprintf("i8254 clock: %lu Hz\n", tot_count);
646 kprintf("failed, using default i8254 clock of %lu Hz\n",
647 i8254_cputimer.freq);
648 return (i8254_cputimer.freq);
654 timer0_state = ACQUIRED;
659 * Timer0 is our fine-grained variable clock interrupt
661 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
662 outb(TIMER_CNTR0, 2); /* lsb */
663 outb(TIMER_CNTR0, 0); /* msb */
667 cputimer_intr_register(&i8254_cputimer_intr);
668 cputimer_intr_select(&i8254_cputimer_intr, 0);
672 * Timer1 or timer2 is our free-running clock, but only if another
673 * has not been selected.
675 cputimer_register(&i8254_cputimer);
676 cputimer_select(&i8254_cputimer, 0);
680 i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
685 * Should we use timer 1 or timer 2 ?
688 TUNABLE_INT_FETCH("hw.i8254.walltimer", &which);
689 if (which != 1 && which != 2)
694 timer->name = "i8254_timer1";
695 timer->type = CPUTIMER_8254_SEL1;
696 i8254_walltimer_sel = TIMER_SEL1;
697 i8254_walltimer_cntr = TIMER_CNTR1;
698 timer1_state = ACQUIRED;
701 timer->name = "i8254_timer2";
702 timer->type = CPUTIMER_8254_SEL2;
703 i8254_walltimer_sel = TIMER_SEL2;
704 i8254_walltimer_cntr = TIMER_CNTR2;
705 timer2_state = ACQUIRED;
709 timer->base = (oldclock + 0xFFFF) & 0xFFFFFFFFFFFF0000LU;
712 outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT);
713 outb(i8254_walltimer_cntr, 0); /* lsb */
714 outb(i8254_walltimer_cntr, 0); /* msb */
715 outb(IO_PPI, inb(IO_PPI) | 1); /* bit 0: enable gate, bit 1: spkr */
720 i8254_cputimer_destruct(struct cputimer *timer)
722 switch(timer->type) {
723 case CPUTIMER_8254_SEL1:
724 timer1_state = RELEASED;
726 case CPUTIMER_8254_SEL2:
727 timer2_state = RELEASED;
738 /* Restore all of the RTC's "status" (actually, control) registers. */
739 writertc(RTC_STATUSB, RTCSB_24HR);
740 writertc(RTC_STATUSA, rtc_statusa);
741 writertc(RTC_STATUSB, rtc_statusb);
745 * Restore all the timers.
747 * This function is called to resynchronize our core timekeeping after a
748 * long halt, e.g. from apm_default_resume() and friends. It is also
749 * called if after a BIOS call we have detected munging of the 8254.
750 * It is necessary because cputimer_count() counter's delta may have grown
751 * too large for nanouptime() and friends to handle, or (in the case of 8254
752 * munging) might cause the SYSTIMER code to prematurely trigger.
758 if (i8254_cputimer_disable == 0)
759 i8254_restore(); /* restore timer_freq and hz */
760 rtc_restore(); /* reenable RTC interrupts */
764 #define MAX_MEASURE_RETRIES 100
767 do_measure(u_int64_t timer_latency, u_int64_t *latency, sysclock_t *time,
770 u_int64_t tsc1, tsc2;
776 if (cnt > MAX_MEASURE_RETRIES/2)
777 threshold = timer_latency << 1;
779 threshold = timer_latency + (timer_latency >> 2);
782 tsc1 = rdtsc_ordered();
783 val = sys_cputimer->count();
784 tsc2 = rdtsc_ordered();
785 } while (timer_latency > 0 && cnt < MAX_MEASURE_RETRIES &&
786 tsc2 - tsc1 > threshold);
789 *latency = tsc2 - tsc1;
795 do_calibrate_cputimer(u_int usecs, u_int64_t timer_latency)
797 if (calibrate_tsc_fast) {
798 u_int64_t old_tsc1, start_lat1, new_tsc1, end_lat1;
799 u_int64_t old_tsc2, start_lat2, new_tsc2, end_lat2;
800 u_int64_t freq1, freq2;
801 sysclock_t start1, end1, start2, end2;
802 int retries1, retries2, retries3, retries4;
805 old_tsc1 = do_measure(timer_latency, &start_lat1, &start1,
808 old_tsc2 = do_measure(timer_latency, &start_lat2, &start2,
811 new_tsc1 = do_measure(timer_latency, &end_lat1, &end1,
814 new_tsc2 = do_measure(timer_latency, &end_lat2, &end2,
817 old_tsc1 += start_lat1;
818 old_tsc2 += start_lat2;
819 freq1 = (new_tsc1 - old_tsc1) + (start_lat1 + end_lat1) / 2;
820 freq2 = (new_tsc2 - old_tsc2) + (start_lat2 + end_lat2) / 2;
823 /* This should in practice be safe from overflows. */
824 freq1 = muldivu64(freq1, sys_cputimer->freq, end1);
825 freq2 = muldivu64(freq2, sys_cputimer->freq, end2);
826 if (calibrate_test && (retries1 > 0 || retries2 > 0)) {
827 kprintf("%s: retries: %d, %d, %d, %d\n",
828 __func__, retries1, retries2, retries3, retries4);
830 if (calibrate_test) {
831 kprintf("%s: freq1=%ju freq2=%ju avg=%ju\n",
832 __func__, freq1, freq2, (freq1 + freq2) / 2);
834 return (freq1 + freq2) / 2;
836 u_int64_t old_tsc, new_tsc;
839 old_tsc = rdtsc_ordered();
842 freq = new_tsc - old_tsc;
843 /* This should in practice be safe from overflows. */
844 freq = (freq * 1000 * 1000) / usecs;
850 * Initialize 8254 timer 0 early so that it can be used in DELAY().
855 const timecounter_init_t **list;
856 sysclock_t delta, freq;
858 callout_init_mp(&sysbeepstop_ch);
861 * Can we use the TSC?
863 * NOTE: If running under qemu, probably a good idea to force the
864 * TSC because we are not likely to detect it as being
865 * invariant or mpsyncd if you don't. This will greatly
866 * reduce SMP contention.
868 if (cpu_feature & CPUID_TSC) {
870 TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant);
872 if ((cpu_vendor_id == CPU_VENDOR_INTEL ||
873 cpu_vendor_id == CPU_VENDOR_AMD) &&
874 cpu_exthigh >= 0x80000007) {
877 do_cpuid(0x80000007, regs);
886 * Initial RTC state, don't do anything unexpected
888 writertc(RTC_STATUSA, rtc_statusa);
889 writertc(RTC_STATUSB, RTCSB_24HR);
891 SET_FOREACH(list, timecounter_init_set) {
892 if ((*list)->configure != NULL)
893 (*list)->configure();
897 * If tsc_frequency is already initialized now, and a flag is set
898 * that i8254 timer is unneeded, we are done.
900 if (tsc_frequency != 0 && i8254_cputimer_disable != 0)
904 * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to
905 * generate an interrupt, which we will ignore for now.
907 * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000
908 * (so it counts a full 2^16 and repeats). We will use this timer
911 if (i8254_cputimer_disable == 0)
914 kprintf("Using cputimer %s for TSC calibration\n", sys_cputimer->name);
917 * When booting without verbose messages, it's pointless to run the
918 * calibrate_clocks() calibration code, when we don't use the
919 * results in any way. With bootverbose, we are at least printing
920 * this information to the kernel log.
922 if (i8254_cputimer_disable != 0 ||
923 (calibrate_timers_with_rtc == 0 && !bootverbose)) {
927 freq = calibrate_clocks();
928 #ifdef CLK_CALIBRATION_LOOP
933 kprintf("Press a key on the console to "
934 "abort clock calibration\n");
935 while ((c = cncheckc()) == -1 || c == NOKEY)
942 * Use the calibrated i8254 frequency if it seems reasonable.
943 * Otherwise use the default, and don't use the calibrated i586
946 delta = freq > i8254_cputimer.freq ?
947 freq - i8254_cputimer.freq : i8254_cputimer.freq - freq;
948 if (delta < i8254_cputimer.freq / 100) {
949 if (calibrate_timers_with_rtc == 0) {
951 "hw.calibrate_timers_with_rtc not set - using default i8254 frequency\n");
952 freq = i8254_cputimer.freq;
956 * Interrupt timer's freq must be adjusted
957 * before we change the cuptimer's frequency.
959 i8254_cputimer_intr.freq = freq;
960 cputimer_set_frequency(&i8254_cputimer, freq);
963 kprintf("%lu Hz differs from default of %lu Hz "
964 "by more than 1%%\n",
965 freq, i8254_cputimer.freq);
969 if (tsc_frequency != 0 && calibrate_timers_with_rtc == 0) {
970 kprintf("hw.calibrate_timers_with_rtc not "
971 "set - using old calibration method\n");
976 if (tsc_present && tsc_frequency == 0) {
978 u_int64_t cputime_latency_tsc = 0, max = 0, min = 0;
981 for (i = 0; i < 10; i++) {
983 (void)sys_cputimer->count();
985 for (i = 0; i < 100; i++) {
986 u_int64_t old_tsc, new_tsc;
988 old_tsc = rdtsc_ordered();
989 (void)sys_cputimer->count();
990 new_tsc = rdtsc_ordered();
991 cputime_latency_tsc += (new_tsc - old_tsc);
992 if (max < (new_tsc - old_tsc))
993 max = new_tsc - old_tsc;
994 if (min == 0 || min > (new_tsc - old_tsc))
995 min = new_tsc - old_tsc;
997 cputime_latency_tsc /= 100;
999 "Timer latency (in TSC ticks): %lu min=%lu max=%lu\n",
1000 cputime_latency_tsc, min, max);
1001 /* XXX Instead of this, properly filter out outliers. */
1002 cputime_latency_tsc = min;
1004 if (calibrate_test > 0) {
1005 u_int64_t values[20], avg = 0;
1006 for (i = 1; i <= 20; i++) {
1009 freq = do_calibrate_cputimer(i * 100 * 1000,
1010 cputime_latency_tsc);
1011 values[i - 1] = freq;
1013 /* Compute an average TSC for the 1s to 2s delays. */
1014 for (i = 10; i < 20; i++)
1017 for (i = 0; i < 20; i++) {
1018 kprintf("%ums: %lu (Diff from average: %ld)\n",
1019 (i + 1) * 100, values[i],
1020 (int64_t)(values[i] - avg));
1024 if (calibrate_tsc_fast > 0) {
1025 /* HPET would typically be >10MHz */
1026 if (sys_cputimer->freq >= 10000000)
1034 tsc_frequency = do_calibrate_cputimer(cnt, cputime_latency_tsc);
1035 if (bootverbose && calibrate_timers_with_rtc) {
1036 kprintf("TSC clock: %jd Hz (Method B)\n",
1037 (intmax_t)tsc_frequency);
1043 kprintf("TSC%s clock: %jd Hz\n",
1044 tsc_invariant ? " invariant" : "",
1045 (intmax_t)tsc_frequency);
1047 tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
1049 EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown,
1050 NULL, SHUTDOWN_PRI_LAST);
1054 * Sync the time of day back to the RTC on shutdown, but only if
1055 * we have already loaded it and have not crashed.
1058 resettodr_on_shutdown(void *arg __unused)
1060 if (rtc_loaded && panicstr == NULL) {
1066 * Initialize the time of day register, based on the time base which is, e.g.
1067 * from a filesystem.
1070 inittodr(time_t base)
1072 unsigned long sec, days;
1083 /* Look if we have a RTC present and the time is valid */
1084 if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
1087 /* wait for time update to complete */
1088 /* If RTCSA_TUP is zero, we have at least 244us before next update */
1090 while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
1096 #ifdef USE_RTC_CENTURY
1097 year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
1099 year = readrtc(RTC_YEAR) + 1900;
1107 month = readrtc(RTC_MONTH);
1108 for (m = 1; m < month; m++)
1109 days += daysinmonth[m-1];
1110 if ((month > 2) && LEAPYEAR(year))
1112 days += readrtc(RTC_DAY) - 1;
1113 for (y = 1970; y < year; y++)
1114 days += DAYSPERYEAR + LEAPYEAR(y);
1115 sec = ((( days * 24 +
1116 readrtc(RTC_HRS)) * 60 +
1117 readrtc(RTC_MIN)) * 60 +
1119 /* sec now contains the number of seconds, since Jan 1 1970,
1120 in the local time zone */
1122 sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1124 y = (int)(time_second - sec);
1125 if (y <= -2 || y >= 2) {
1126 /* badly off, adjust it */
1136 kprintf("Invalid time in real time clock.\n");
1137 kprintf("Check and reset the date immediately!\n");
1141 * Write system time back to RTC
1151 if (disable_rtc_set)
1158 /* Disable RTC updates and interrupts. */
1159 writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
1161 /* Calculate local time to put in RTC */
1163 tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1165 writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */
1166 writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */
1167 writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */
1169 /* We have now the days since 01-01-1970 in tm */
1170 writertc(RTC_WDAY, (tm+4)%7); /* Write back Weekday */
1171 for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
1173 y++, m = DAYSPERYEAR + LEAPYEAR(y))
1176 /* Now we have the years in y and the day-of-the-year in tm */
1177 writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */
1178 #ifdef USE_RTC_CENTURY
1179 writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */
1181 for (m = 0; ; m++) {
1184 ml = daysinmonth[m];
1185 if (m == 1 && LEAPYEAR(y))
1192 writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */
1193 writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */
1195 /* Reenable RTC updates and interrupts. */
1196 writertc(RTC_STATUSB, rtc_statusb);
1201 i8254_ioapic_trial(int irq, struct cputimer_intr *cti)
1207 * Following code assumes the 8254 is the cpu timer,
1208 * so make sure it is.
1210 /*KKASSERT(sys_cputimer == &i8254_cputimer); (tested by CuteLarva) */
1211 KKASSERT(cti == &i8254_cputimer_intr);
1213 lastcnt = get_interrupt_counter(irq, mycpuid);
1216 * Force an 8254 Timer0 interrupt and wait 1/100s for
1217 * it to happen, then see if we got it.
1219 kprintf("IOAPIC: testing 8254 interrupt delivery...");
1221 i8254_intr_reload(cti, sys_cputimer->fromus(2));
1222 base = sys_cputimer->count();
1223 while (sys_cputimer->count() - base < sys_cputimer->freq / 100)
1226 if (get_interrupt_counter(irq, mycpuid) - lastcnt == 0) {
1227 kprintf(" failed\n");
1230 kprintf(" success\n");
1236 * Start both clocks running. DragonFly note: the stat clock is no longer
1237 * used. Instead, 8254 based systimers are used for all major clock
1241 i8254_intr_initclock(struct cputimer_intr *cti, boolean_t selected)
1243 void *clkdesc = NULL;
1244 int irq = 0, mixed_mode = 0, error;
1246 KKASSERT(mycpuid == 0);
1248 if (!selected && i8254_intr_disable)
1252 * The stat interrupt mask is different without the
1253 * statistics clock. Also, don't set the interrupt
1254 * flag which would normally cause the RTC to generate
1257 rtc_statusb = RTCSB_24HR;
1259 /* Finish initializing 8254 timer 0. */
1260 if (ioapic_enable) {
1261 irq = machintr_legacy_intr_find(0, INTR_TRIGGER_EDGE,
1262 INTR_POLARITY_HIGH);
1265 error = ioapic_conf_legacy_extint(0);
1267 irq = machintr_legacy_intr_find(0,
1268 INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH);
1275 kprintf("IOAPIC: setup mixed mode for "
1276 "irq 0 failed: %d\n", error);
1279 panic("IOAPIC: setup mixed mode for "
1280 "irq 0 failed: %d\n", error);
1285 clkdesc = register_int(irq, clkintr, NULL, "clk",
1287 INTR_EXCL | INTR_CLOCK |
1288 INTR_NOPOLL | INTR_MPSAFE |
1291 register_int(0, clkintr, NULL, "clk", NULL,
1292 INTR_EXCL | INTR_CLOCK |
1293 INTR_NOPOLL | INTR_MPSAFE |
1297 /* Initialize RTC. */
1298 writertc(RTC_STATUSA, rtc_statusa);
1299 writertc(RTC_STATUSB, RTCSB_24HR);
1301 if (ioapic_enable) {
1302 error = i8254_ioapic_trial(irq, cti);
1306 kprintf("IOAPIC: mixed mode for irq %d "
1307 "trial failed: %d\n",
1311 panic("IOAPIC: mixed mode for irq %d "
1312 "trial failed: %d\n", irq, error);
1315 kprintf("IOAPIC: warning 8254 is not connected "
1316 "to the correct pin, try mixed mode\n");
1317 unregister_int(clkdesc, 0);
1318 goto mixed_mode_setup;
1325 i8254_nointr = 1; /* don't try to register again */
1326 cputimer_intr_deregister(cti);
1330 setstatclockrate(int newhz)
1332 if (newhz == RTC_PROFRATE)
1333 rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF;
1335 rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
1336 writertc(RTC_STATUSA, rtc_statusa);
1341 tsc_get_timecount(struct timecounter *tc)
1347 #ifdef KERN_TIMESTAMP
1348 #define KERN_TIMESTAMP_SIZE 16384
1349 static u_long tsc[KERN_TIMESTAMP_SIZE] ;
1350 SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc,
1351 sizeof(tsc), "LU", "Kernel timestamps");
1357 tsc[i] = (u_int32_t)rdtsc();
1360 if (i >= KERN_TIMESTAMP_SIZE)
1362 tsc[i] = 0; /* mark last entry */
1364 #endif /* KERN_TIMESTAMP */
1371 hw_i8254_timestamp(SYSCTL_HANDLER_ARGS)
1378 if (sys_cputimer == &i8254_cputimer)
1379 count = sys_cputimer->count();
1387 ksnprintf(buf, sizeof(buf), "%016lx %016lx", count, tscval);
1388 return(SYSCTL_OUT(req, buf, strlen(buf) + 1));
1391 struct tsc_mpsync_info {
1392 volatile int tsc_ready_cnt;
1393 volatile int tsc_done_cnt;
1394 volatile int tsc_command;
1395 volatile int unused01[5];
1399 } tsc_saved[MAXCPU];
1404 tsc_mpsync_test_loop(struct tsc_mpsync_thr *info)
1406 struct globaldata *gd = mycpu;
1407 tsc_uclock_t test_end, test_begin;
1411 kprintf("cpu%d: TSC testing MP synchronization ...\n",
1415 test_begin = rdtsc_ordered();
1416 /* Run test for 100ms */
1417 test_end = test_begin + (tsc_frequency / 10);
1419 arg->tsc_mpsync = 1;
1420 arg->tsc_target = test_begin;
1422 #define TSC_TEST_TRYMAX 1000000 /* Make sure we could stop */
1423 #define TSC_TEST_TRYMIN 50000
1425 for (i = 0; i < TSC_TEST_TRYMAX; ++i) {
1426 struct lwkt_cpusync cs;
1429 lwkt_cpusync_init(&cs, gd->gd_other_cpus,
1430 tsc_mpsync_test_remote, arg);
1431 lwkt_cpusync_interlock(&cs);
1433 arg->tsc_target = rdtsc_ordered();
1435 lwkt_cpusync_deinterlock(&cs);
1439 if (!arg->tsc_mpsync) {
1440 kprintf("cpu%d: TSC is not MP synchronized @%u\n",
1444 if (arg->tsc_target > test_end && i >= TSC_TEST_TRYMIN)
1448 #undef TSC_TEST_TRYMIN
1449 #undef TSC_TEST_TRYMAX
1451 if (arg->tsc_target == test_begin) {
1452 kprintf("cpu%d: TSC does not tick?!\n", gd->gd_cpuid);
1453 /* XXX disable TSC? */
1455 arg->tsc_mpsync = 0;
1459 if (arg->tsc_mpsync && bootverbose) {
1460 kprintf("cpu%d: TSC is MP synchronized after %u tries\n",
1467 #define TSC_TEST_COUNT 50000
1470 tsc_mpsync_ap_thread(void *xinfo)
1472 struct tsc_mpsync_info *info = xinfo;
1477 * Tell main loop that we are ready and wait for initiation
1479 atomic_add_int(&info->tsc_ready_cnt, 1);
1480 while (info->tsc_command == 0) {
1481 lwkt_force_switch();
1485 * Run test for 10000 loops or until tsc_done_cnt != 0 (another
1486 * cpu has finished its test), then increment done.
1489 for (i = 0; i < TSC_TEST_COUNT && info->tsc_done_cnt == 0; ++i) {
1490 info->tsc_saved[cpu].v = rdtsc_ordered();
1493 atomic_add_int(&info->tsc_done_cnt, 1);
1499 tsc_mpsync_test(void)
1501 enum { TSCOK, TSCNEG, TSCSPAN } error = TSCOK;
1505 if (!tsc_invariant) {
1506 /* Not even invariant TSC */
1507 kprintf("TSC is not invariant, "
1508 "no further tests will be performed\n");
1519 * Forcing can be used w/qemu to reduce contention
1521 TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync);
1523 if (tsc_mpsync == 0) {
1524 switch (cpu_vendor_id) {
1525 case CPU_VENDOR_INTEL:
1527 * Intel probably works
1531 case CPU_VENDOR_AMD:
1533 * For AMD 15h and 16h (i.e. The Bulldozer and Jaguar
1534 * architectures) we have to watch out for
1536 * "Processor Core Time Stamp Counters May
1538 * This Erratum is only listed for cpus in Family
1539 * 15h < Model 30h and for 16h < Model 30h.
1541 * AMD < Bulldozer probably doesn't work
1543 if (CPUID_TO_FAMILY(cpu_id) == 0x15 ||
1544 CPUID_TO_FAMILY(cpu_id) == 0x16) {
1545 if (CPUID_TO_MODEL(cpu_id) < 0x30)
1547 } else if (CPUID_TO_FAMILY(cpu_id) < 0x17) {
1553 /* probably won't work */
1556 } else if (tsc_mpsync < 0) {
1557 kprintf("TSC MP synchronization test is disabled\n");
1563 * Test even if forced to 1 above. If forced, we will use the TSC
1564 * even if the test fails. (set forced to -1 to disable entirely).
1566 kprintf("TSC testing MP synchronization ...\n");
1567 kprintf("TSC testing MP: NOTE! CPU pwrsave will inflate latencies!\n");
1570 * Test that the TSC is monotonically increasing across CPU
1571 * switches. Otherwise time will get really messed up if the
1572 * TSC is selected as the timebase.
1576 for (try = 0; tsc_frequency && try < 4; ++try) {
1580 tsc_sclock_t lo_delta = 0x7FFFFFFFFFFFFFFFLL;
1581 tsc_sclock_t hi_delta = -0x7FFFFFFFFFFFFFFFLL;
1584 for (cpu = 0; cpu < ncpus; ++cpu) {
1585 lwkt_migratecpu(cpu);
1592 delta = next - last;
1594 kprintf("TSC cpu-delta NEGATIVE: "
1595 "cpu %d to %d (%ld)\n",
1596 cpu - 1, cpu, delta);
1599 if (lo_delta > delta)
1601 if (hi_delta < delta)
1606 for (cpu = ncpus - 2; cpu >= 0; --cpu) {
1607 lwkt_migratecpu(cpu);
1609 delta = next - last;
1611 kprintf("TSC cpu-delta WAS NEGATIVE! "
1612 "cpu %d to %d (%ld)\n",
1613 cpu + 1, cpu, delta);
1616 if (lo_delta > delta)
1618 if (hi_delta < delta)
1622 kprintf("TSC cpu-delta test complete, %ldnS to %ldnS ",
1623 muldivu64(lo_delta, 1000000000, tsc_frequency),
1624 muldivu64(hi_delta, 1000000000, tsc_frequency));
1625 if (error != TSCOK) {
1626 kprintf("FAILURE\n");
1629 kprintf("SUCCESS\n");
1633 * Test TSC MP synchronization on APs.
1637 for (try = 0; tsc_frequency && try < 4; ++try) {
1638 struct tsc_mpsync_info info;
1644 bzero(&info, sizeof(info));
1646 for (cpu = 0; cpu < ncpus; ++cpu) {
1648 lwkt_create(tsc_mpsync_ap_thread, &info, &td,
1649 NULL, TDF_NOSTART, cpu,
1650 "tsc mpsync %d", cpu);
1651 lwkt_setpri_initial(td, curthread->td_pri);
1654 while (info.tsc_ready_cnt != ncpus)
1655 lwkt_force_switch();
1658 * All threads are ready, start the test and wait for
1661 info.tsc_command = 1;
1662 while (info.tsc_done_cnt != ncpus)
1663 lwkt_force_switch();
1668 last = info.tsc_saved[0].v;
1671 for (cpu = 0; cpu < ncpus; ++cpu) {
1672 xdelta = (int64_t)(info.tsc_saved[cpu].v - last);
1673 last = info.tsc_saved[cpu].v;
1676 if (xworst < xdelta)
1683 * Result from attempt. Break-out if we succeeds, otherwise
1684 * try again (up to 4 times). This might be in a VM so we
1685 * need to be robust.
1687 kprintf("TSC cpu concurrency test complete, worst=%ldns, "
1689 muldivu64(xworst, 1000000000, tsc_frequency),
1690 muldivu64(delta / ncpus, 1000000000, tsc_frequency));
1691 if (delta / ncpus > tsc_frequency / 100) {
1692 kprintf("FAILURE\n");
1694 if (delta / ncpus < tsc_frequency / 100000) {
1695 kprintf("SUCCESS\n");
1700 kprintf("INDETERMINATE\n");
1704 kprintf("TSC is MP synchronized\n");
1706 kprintf("TSC is not MP synchronized\n");
1708 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL);
1710 static SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254");
1711 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0,
1713 SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD,
1714 0, 0, hw_i8254_timestamp, "A", "");
1716 SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD,
1717 &tsc_present, 0, "TSC Available");
1718 SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD,
1719 &tsc_invariant, 0, "Invariant TSC");
1720 SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD,
1721 &tsc_mpsync, 0, "TSC is synchronized across CPUs");
1722 SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD,
1723 &tsc_frequency, 0, "TSC Frequency");