ee45f6f9a5881340bd7e40db0c28542a4607867c
[dragonfly.git] / sys / platform / pc64 / isa / clock.c
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * Copyright (c) 2008 The DragonFly Project.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * William Jolitz and Don Ahn.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *      from: @(#)clock.c       7.2 (Berkeley) 5/12/91
34  * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $
35  */
36
37 /*
38  * Routines to handle clock hardware.
39  */
40
41 /*
42  * inittodr, settodr and support routines written
43  * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at>
44  *
45  * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94
46  */
47
48 #if 0
49 #include "opt_clock.h"
50 #endif
51
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/eventhandler.h>
55 #include <sys/time.h>
56 #include <sys/kernel.h>
57 #include <sys/bus.h>
58 #include <sys/sysctl.h>
59 #include <sys/cons.h>
60 #include <sys/kbio.h>
61 #include <sys/systimer.h>
62 #include <sys/globaldata.h>
63 #include <sys/machintr.h>
64 #include <sys/interrupt.h>
65
66 #include <sys/thread2.h>
67
68 #include <machine/clock.h>
69 #include <machine/cputypes.h>
70 #include <machine/frame.h>
71 #include <machine/ipl.h>
72 #include <machine/limits.h>
73 #include <machine/md_var.h>
74 #include <machine/psl.h>
75 #include <machine/segments.h>
76 #include <machine/smp.h>
77 #include <machine/specialreg.h>
78 #include <machine/intr_machdep.h>
79
80 #include <machine_base/apic/ioapic.h>
81 #include <machine_base/apic/ioapic_abi.h>
82 #include <machine_base/icu/icu.h>
83 #include <bus/isa/isa.h>
84 #include <bus/isa/rtc.h>
85 #include <machine_base/isa/timerreg.h>
86
87 static void i8254_restore(void);
88 static void resettodr_on_shutdown(void *arg __unused);
89
90 /*
91  * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
92  * can use a simple formula for leap years.
93  */
94 #define LEAPYEAR(y) ((u_int)(y) % 4 == 0)
95 #define DAYSPERYEAR   (31+28+31+30+31+30+31+31+30+31+30+31)
96
97 #ifndef TIMER_FREQ
98 #define TIMER_FREQ   1193182
99 #endif
100
101 static uint8_t i8254_walltimer_sel;
102 static uint16_t i8254_walltimer_cntr;
103
104 int     adjkerntz;              /* local offset from GMT in seconds */
105 int     disable_rtc_set;        /* disable resettodr() if != 0 */
106 int     tsc_present;
107 int     tsc_invariant;
108 int     tsc_mpsync;
109 int64_t tsc_frequency;
110 int     tsc_is_broken;
111 int     wall_cmos_clock;        /* wall CMOS clock assumed if != 0 */
112 int     timer0_running;
113 enum tstate { RELEASED, ACQUIRED };
114 enum tstate timer0_state;
115 enum tstate timer1_state;
116 enum tstate timer2_state;
117
118 static  int     beeping = 0;
119 static  const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
120 static  u_char  rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
121 static  u_char  rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
122 static  int     rtc_loaded;
123
124 static int i8254_cputimer_div;
125
126 static int i8254_nointr;
127 static int i8254_intr_disable = 1;
128 TUNABLE_INT("hw.i8254.intr_disable", &i8254_intr_disable);
129
130 static int calibrate_timers_with_rtc = 0;
131 TUNABLE_INT("hw.calibrate_timers_with_rtc", &calibrate_timers_with_rtc);
132
133 static struct callout sysbeepstop_ch;
134
135 static sysclock_t i8254_cputimer_count(void);
136 static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last);
137 static void i8254_cputimer_destruct(struct cputimer *cputimer);
138
139 static struct cputimer  i8254_cputimer = {
140     .next               = SLIST_ENTRY_INITIALIZER,
141     .name               = "i8254",
142     .pri                = CPUTIMER_PRI_8254,
143     .type               = 0,    /* determined later */
144     .count              = i8254_cputimer_count,
145     .fromhz             = cputimer_default_fromhz,
146     .fromus             = cputimer_default_fromus,
147     .construct          = i8254_cputimer_construct,
148     .destruct           = i8254_cputimer_destruct,
149     .freq               = TIMER_FREQ
150 };
151
152 static sysclock_t tsc_cputimer_count_mfence(void);
153 static sysclock_t tsc_cputimer_count_lfence(void);
154 static void tsc_cputimer_construct(struct cputimer *, sysclock_t);
155
156 static struct cputimer  tsc_cputimer = {
157     .next               = SLIST_ENTRY_INITIALIZER,
158     .name               = "TSC",
159     .pri                = CPUTIMER_PRI_TSC,
160     .type               = CPUTIMER_TSC,
161     .count              = NULL, /* determined later */
162     .fromhz             = cputimer_default_fromhz,
163     .fromus             = cputimer_default_fromus,
164     .construct          = tsc_cputimer_construct,
165     .destruct           = cputimer_default_destruct,
166     .freq               = 0     /* determined later */
167 };
168
169 static struct cpucounter tsc_cpucounter = {
170     .freq               = 0,    /* determined later */
171     .count              = NULL, /* determined later */
172     .flags              = 0,    /* adjusted later */
173     .prio               = CPUCOUNTER_PRIO_TSC,
174     .type               = CPUCOUNTER_TSC
175 };
176
177 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t);
178 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *);
179 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t);
180
181 static struct cputimer_intr i8254_cputimer_intr = {
182     .freq = TIMER_FREQ,
183     .reload = i8254_intr_reload,
184     .enable = cputimer_intr_default_enable,
185     .config = i8254_intr_config,
186     .restart = cputimer_intr_default_restart,
187     .pmfixup = cputimer_intr_default_pmfixup,
188     .initclock = i8254_intr_initclock,
189     .pcpuhand = NULL,
190     .next = SLIST_ENTRY_INITIALIZER,
191     .name = "i8254",
192     .type = CPUTIMER_INTR_8254,
193     .prio = CPUTIMER_INTR_PRIO_8254,
194     .caps = CPUTIMER_INTR_CAP_PS,
195     .priv = NULL
196 };
197
198 /*
199  * timer0 clock interrupt.  Timer0 is in one-shot mode and has stopped
200  * counting as of this interrupt.  We use timer1 in free-running mode (not
201  * generating any interrupts) as our main counter.  Each cpu has timeouts
202  * pending.
203  *
204  * This code is INTR_MPSAFE and may be called without the BGL held.
205  */
206 static void
207 clkintr(void *dummy, void *frame_arg)
208 {
209         static sysclock_t sysclock_count;       /* NOTE! Must be static */
210         struct globaldata *gd = mycpu;
211         struct globaldata *gscan;
212         int n;
213
214         /*
215          * SWSTROBE mode is a one-shot, the timer is no longer running
216          */
217         timer0_running = 0;
218
219         /*
220          * XXX the dispatcher needs work.  right now we call systimer_intr()
221          * directly or via IPI for any cpu with systimers queued, which is
222          * usually *ALL* of them.  We need to use the LAPIC timer for this.
223          */
224         sysclock_count = sys_cputimer->count();
225         for (n = 0; n < ncpus; ++n) {
226             gscan = globaldata_find(n);
227             if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL)
228                 continue;
229             if (gscan != gd) {
230                 lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr, 
231                                 &sysclock_count, 1);
232             } else {
233                 systimer_intr(&sysclock_count, 0, frame_arg);
234             }
235         }
236 }
237
238
239 /*
240  * NOTE! not MP safe.
241  */
242 int
243 acquire_timer2(int mode)
244 {
245         if (timer2_state != RELEASED)
246                 return (-1);
247         timer2_state = ACQUIRED;
248
249         /*
250          * This access to the timer registers is as atomic as possible
251          * because it is a single instruction.  We could do better if we
252          * knew the rate.
253          */
254         outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f));
255         return (0);
256 }
257
258 int
259 release_timer2(void)
260 {
261         if (timer2_state != ACQUIRED)
262                 return (-1);
263         outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT);
264         timer2_state = RELEASED;
265         return (0);
266 }
267
268 #include "opt_ddb.h"
269 #ifdef DDB
270 #include <ddb/ddb.h>
271
272 DB_SHOW_COMMAND(rtc, rtc)
273 {
274         kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n",
275                rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY),
276                rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC),
277                rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR));
278 }
279 #endif /* DDB */
280
281 /*
282  * Return the current cpu timer count as a 32 bit integer.
283  */
284 static
285 sysclock_t
286 i8254_cputimer_count(void)
287 {
288         static uint16_t cputimer_last;
289         uint16_t count;
290         sysclock_t ret;
291
292         clock_lock();
293         outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH);
294         count = (uint8_t)inb(i8254_walltimer_cntr);             /* get countdown */
295         count |= ((uint8_t)inb(i8254_walltimer_cntr) << 8);
296         count = -count;                                 /* -> countup */
297         if (count < cputimer_last)                      /* rollover */
298                 i8254_cputimer.base += 0x00010000;
299         ret = i8254_cputimer.base | count;
300         cputimer_last = count;
301         clock_unlock();
302         return(ret);
303 }
304
305 /*
306  * This function is called whenever the system timebase changes, allowing
307  * us to calculate what is needed to convert a system timebase tick 
308  * into an 8254 tick for the interrupt timer.  If we can convert to a
309  * simple shift, multiplication, or division, we do so.  Otherwise 64
310  * bit arithmatic is required every time the interrupt timer is reloaded.
311  */
312 static void
313 i8254_intr_config(struct cputimer_intr *cti, const struct cputimer *timer)
314 {
315     int freq;
316     int div;
317
318     /*
319      * Will a simple divide do the trick?
320      */
321     div = (timer->freq + (cti->freq / 2)) / cti->freq;
322     freq = cti->freq * div;
323
324     if (freq >= timer->freq - 1 && freq <= timer->freq + 1)
325         i8254_cputimer_div = div;
326     else
327         i8254_cputimer_div = 0;
328 }
329
330 /*
331  * Reload for the next timeout.  It is possible for the reload value
332  * to be 0 or negative, indicating that an immediate timer interrupt
333  * is desired.  For now make the minimum 2 ticks.
334  *
335  * We may have to convert from the system timebase to the 8254 timebase.
336  */
337 static void
338 i8254_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
339 {
340     uint16_t count;
341
342     if (i8254_cputimer_div)
343         reload /= i8254_cputimer_div;
344     else
345         reload = (int64_t)reload * cti->freq / sys_cputimer->freq;
346
347     if ((int)reload < 2)
348         reload = 2;
349
350     clock_lock();
351     if (timer0_running) {
352         outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);     /* count-down timer */
353         count = (uint8_t)inb(TIMER_CNTR0);              /* lsb */
354         count |= ((uint8_t)inb(TIMER_CNTR0) << 8);      /* msb */
355         if (reload < count) {
356             outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
357             outb(TIMER_CNTR0, (uint8_t)reload);         /* lsb */
358             outb(TIMER_CNTR0, (uint8_t)(reload >> 8));  /* msb */
359         }
360     } else {
361         timer0_running = 1;
362         if (reload > 0xFFFF)
363             reload = 0;         /* full count */
364         outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
365         outb(TIMER_CNTR0, (uint8_t)reload);             /* lsb */
366         outb(TIMER_CNTR0, (uint8_t)(reload >> 8));      /* msb */
367     }
368     clock_unlock();
369 }
370
371 /*
372  * DELAY(usec)       - Spin for the specified number of microseconds.
373  * DRIVERSLEEP(usec) - Spin for the specified number of microseconds,
374  *                     but do a thread switch in the loop
375  *
376  * Relies on timer 1 counting down from (cputimer_freq / hz)
377  * Note: timer had better have been programmed before this is first used!
378  */
379 static void
380 DODELAY(int n, int doswitch)
381 {
382         ssysclock_t delta, ticks_left;
383         sysclock_t prev_tick, tick;
384
385 #ifdef DELAYDEBUG
386         int getit_calls = 1;
387         int n1;
388         static int state = 0;
389
390         if (state == 0) {
391                 state = 1;
392                 for (n1 = 1; n1 <= 10000000; n1 *= 10)
393                         DELAY(n1);
394                 state = 2;
395         }
396         if (state == 1)
397                 kprintf("DELAY(%d)...", n);
398 #endif
399         /*
400          * Guard against the timer being uninitialized if we are called
401          * early for console i/o.
402          */
403         if (timer0_state == RELEASED)
404                 i8254_restore();
405
406         /*
407          * Read the counter first, so that the rest of the setup overhead is
408          * counted.  Then calculate the number of hardware timer ticks
409          * required, rounding up to be sure we delay at least the requested
410          * number of microseconds.
411          */
412         prev_tick = sys_cputimer->count();
413         ticks_left = ((u_int)n * (int64_t)sys_cputimer->freq + 999999) /
414                      1000000;
415
416         /*
417          * Loop until done.
418          */
419         while (ticks_left > 0) {
420                 tick = sys_cputimer->count();
421 #ifdef DELAYDEBUG
422                 ++getit_calls;
423 #endif
424                 delta = tick - prev_tick;
425                 prev_tick = tick;
426                 if (delta < 0)
427                         delta = 0;
428                 ticks_left -= delta;
429                 if (doswitch && ticks_left > 0)
430                         lwkt_switch();
431                 cpu_pause();
432         }
433 #ifdef DELAYDEBUG
434         if (state == 1)
435                 kprintf(" %d calls to getit() at %d usec each\n",
436                        getit_calls, (n + 5) / getit_calls);
437 #endif
438 }
439
440 /*
441  * DELAY() never switches.
442  */
443 void
444 DELAY(int n)
445 {
446         DODELAY(n, 0);
447 }
448
449 /*
450  * Returns non-zero if the specified time period has elapsed.  Call
451  * first with last_clock set to 0.
452  */
453 int
454 CHECKTIMEOUT(TOTALDELAY *tdd)
455 {
456         sysclock_t delta;
457         int us;
458
459         if (tdd->started == 0) {
460                 if (timer0_state == RELEASED)
461                         i8254_restore();
462                 tdd->last_clock = sys_cputimer->count();
463                 tdd->started = 1;
464                 return(0);
465         }
466         delta = sys_cputimer->count() - tdd->last_clock;
467         us = (u_int64_t)delta * (u_int64_t)1000000 /
468              (u_int64_t)sys_cputimer->freq;
469         tdd->last_clock += (u_int64_t)us * (u_int64_t)sys_cputimer->freq /
470                            1000000;
471         tdd->us -= us;
472         return (tdd->us < 0);
473 }
474
475
476 /*
477  * DRIVERSLEEP() does not switch if called with a spinlock held or
478  * from a hard interrupt.
479  */
480 void
481 DRIVERSLEEP(int usec)
482 {
483         globaldata_t gd = mycpu;
484
485         if (gd->gd_intr_nesting_level || gd->gd_spinlocks) {
486                 DODELAY(usec, 0);
487         } else {
488                 DODELAY(usec, 1);
489         }
490 }
491
492 static void
493 sysbeepstop(void *chan)
494 {
495         outb(IO_PPI, inb(IO_PPI)&0xFC); /* disable counter2 output to speaker */
496         beeping = 0;
497         release_timer2();
498 }
499
500 int
501 sysbeep(int pitch, int period)
502 {
503         if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
504                 return(-1);
505         if (sysbeep_enable == 0)
506                 return(-1);
507         /*
508          * Nobody else is using timer2, we do not need the clock lock
509          */
510         outb(TIMER_CNTR2, pitch);
511         outb(TIMER_CNTR2, (pitch>>8));
512         if (!beeping) {
513                 /* enable counter2 output to speaker */
514                 outb(IO_PPI, inb(IO_PPI) | 3);
515                 beeping = period;
516                 callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL);
517         }
518         return (0);
519 }
520
521 /*
522  * RTC support routines
523  */
524
525 int
526 rtcin(int reg)
527 {
528         u_char val;
529
530         crit_enter();
531         outb(IO_RTC, reg);
532         inb(0x84);
533         val = inb(IO_RTC + 1);
534         inb(0x84);
535         crit_exit();
536         return (val);
537 }
538
539 static __inline void
540 writertc(u_char reg, u_char val)
541 {
542         crit_enter();
543         inb(0x84);
544         outb(IO_RTC, reg);
545         inb(0x84);
546         outb(IO_RTC + 1, val);
547         inb(0x84);              /* XXX work around wrong order in rtcin() */
548         crit_exit();
549 }
550
551 static __inline int
552 readrtc(int port)
553 {
554         return(bcd2bin(rtcin(port)));
555 }
556
557 static u_int
558 calibrate_clocks(void)
559 {
560         u_int64_t old_tsc;
561         u_int tot_count;
562         sysclock_t count, prev_count;
563         int sec, start_sec, timeout;
564
565         if (bootverbose)
566                 kprintf("Calibrating clock(s) ...\n");
567         if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
568                 goto fail;
569         timeout = 100000000;
570
571         /* Read the mc146818A seconds counter. */
572         for (;;) {
573                 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
574                         sec = rtcin(RTC_SEC);
575                         break;
576                 }
577                 if (--timeout == 0)
578                         goto fail;
579         }
580
581         /* Wait for the mC146818A seconds counter to change. */
582         start_sec = sec;
583         for (;;) {
584                 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
585                         sec = rtcin(RTC_SEC);
586                         if (sec != start_sec)
587                                 break;
588                 }
589                 if (--timeout == 0)
590                         goto fail;
591         }
592
593         /* Start keeping track of the i8254 counter. */
594         prev_count = sys_cputimer->count();
595         tot_count = 0;
596
597         if (tsc_present) 
598                 old_tsc = rdtsc();
599         else
600                 old_tsc = 0;            /* shut up gcc */
601
602         /*
603          * Wait for the mc146818A seconds counter to change.  Read the i8254
604          * counter for each iteration since this is convenient and only
605          * costs a few usec of inaccuracy. The timing of the final reads
606          * of the counters almost matches the timing of the initial reads,
607          * so the main cause of inaccuracy is the varying latency from 
608          * inside getit() or rtcin(RTC_STATUSA) to the beginning of the
609          * rtcin(RTC_SEC) that returns a changed seconds count.  The
610          * maximum inaccuracy from this cause is < 10 usec on 486's.
611          */
612         start_sec = sec;
613         for (;;) {
614                 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP))
615                         sec = rtcin(RTC_SEC);
616                 count = sys_cputimer->count();
617                 tot_count += (int)(count - prev_count);
618                 prev_count = count;
619                 if (sec != start_sec)
620                         break;
621                 if (--timeout == 0)
622                         goto fail;
623         }
624
625         /*
626          * Read the cpu cycle counter.  The timing considerations are
627          * similar to those for the i8254 clock.
628          */
629         if (tsc_present) {
630                 tsc_frequency = rdtsc() - old_tsc;
631                 if (bootverbose) {
632                         kprintf("TSC clock: %jd Hz (Method A)\n",
633                             (intmax_t)tsc_frequency);
634                 }
635         }
636
637         kprintf("i8254 clock: %u Hz\n", tot_count);
638         return (tot_count);
639
640 fail:
641         kprintf("failed, using default i8254 clock of %u Hz\n",
642                 i8254_cputimer.freq);
643         return (i8254_cputimer.freq);
644 }
645
646 static void
647 i8254_restore(void)
648 {
649         timer0_state = ACQUIRED;
650
651         clock_lock();
652
653         /*
654          * Timer0 is our fine-grained variable clock interrupt
655          */
656         outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
657         outb(TIMER_CNTR0, 2);   /* lsb */
658         outb(TIMER_CNTR0, 0);   /* msb */
659         clock_unlock();
660
661         if (!i8254_nointr) {
662                 cputimer_intr_register(&i8254_cputimer_intr);
663                 cputimer_intr_select(&i8254_cputimer_intr, 0);
664         }
665
666         /*
667          * Timer1 or timer2 is our free-running clock, but only if another
668          * has not been selected.
669          */
670         cputimer_register(&i8254_cputimer);
671         cputimer_select(&i8254_cputimer, 0);
672 }
673
674 static void
675 i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
676 {
677         int which;
678
679         /*
680          * Should we use timer 1 or timer 2 ?
681          */
682         which = 0;
683         TUNABLE_INT_FETCH("hw.i8254.walltimer", &which);
684         if (which != 1 && which != 2)
685                 which = 2;
686
687         switch(which) {
688         case 1:
689                 timer->name = "i8254_timer1";
690                 timer->type = CPUTIMER_8254_SEL1;
691                 i8254_walltimer_sel = TIMER_SEL1;
692                 i8254_walltimer_cntr = TIMER_CNTR1;
693                 timer1_state = ACQUIRED;
694                 break;
695         case 2:
696                 timer->name = "i8254_timer2";
697                 timer->type = CPUTIMER_8254_SEL2;
698                 i8254_walltimer_sel = TIMER_SEL2;
699                 i8254_walltimer_cntr = TIMER_CNTR2;
700                 timer2_state = ACQUIRED;
701                 break;
702         }
703
704         timer->base = (oldclock + 0xFFFF) & ~0xFFFF;
705
706         clock_lock();
707         outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT);
708         outb(i8254_walltimer_cntr, 0);  /* lsb */
709         outb(i8254_walltimer_cntr, 0);  /* msb */
710         outb(IO_PPI, inb(IO_PPI) | 1);  /* bit 0: enable gate, bit 1: spkr */
711         clock_unlock();
712 }
713
714 static void
715 i8254_cputimer_destruct(struct cputimer *timer)
716 {
717         switch(timer->type) {
718         case CPUTIMER_8254_SEL1:
719             timer1_state = RELEASED;
720             break;
721         case CPUTIMER_8254_SEL2:
722             timer2_state = RELEASED;
723             break;
724         default:
725             break;
726         }
727         timer->type = 0;
728 }
729
730 static void
731 rtc_restore(void)
732 {
733         /* Restore all of the RTC's "status" (actually, control) registers. */
734         writertc(RTC_STATUSB, RTCSB_24HR);
735         writertc(RTC_STATUSA, rtc_statusa);
736         writertc(RTC_STATUSB, rtc_statusb);
737 }
738
739 /*
740  * Restore all the timers.
741  *
742  * This function is called to resynchronize our core timekeeping after a
743  * long halt, e.g. from apm_default_resume() and friends.  It is also 
744  * called if after a BIOS call we have detected munging of the 8254.
745  * It is necessary because cputimer_count() counter's delta may have grown
746  * too large for nanouptime() and friends to handle, or (in the case of 8254
747  * munging) might cause the SYSTIMER code to prematurely trigger.
748  */
749 void
750 timer_restore(void)
751 {
752         crit_enter();
753         i8254_restore();                /* restore timer_freq and hz */
754         rtc_restore();                  /* reenable RTC interrupts */
755         crit_exit();
756 }
757
758 /*
759  * Initialize 8254 timer 0 early so that it can be used in DELAY().
760  */
761 void
762 startrtclock(void)
763 {
764         u_int delta, freq;
765
766         /* 
767          * Can we use the TSC?
768          *
769          * NOTE: If running under qemu, probably a good idea to force the
770          *       TSC because we are not likely to detect it as being
771          *       invariant or mpsyncd if you don't.  This will greatly
772          *       reduce SMP contention.
773          */
774         if (cpu_feature & CPUID_TSC) {
775                 tsc_present = 1;
776                 TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant);
777
778                 if ((cpu_vendor_id == CPU_VENDOR_INTEL ||
779                      cpu_vendor_id == CPU_VENDOR_AMD) &&
780                     cpu_exthigh >= 0x80000007) {
781                         u_int regs[4];
782
783                         do_cpuid(0x80000007, regs);
784                         if (regs[3] & 0x100)
785                                 tsc_invariant = 1;
786                 }
787         } else {
788                 tsc_present = 0;
789         }
790
791         /*
792          * Initial RTC state, don't do anything unexpected
793          */
794         writertc(RTC_STATUSA, rtc_statusa);
795         writertc(RTC_STATUSB, RTCSB_24HR);
796
797         /*
798          * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to 
799          * generate an interrupt, which we will ignore for now.
800          *
801          * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000
802          * (so it counts a full 2^16 and repeats).  We will use this timer
803          * for our counting.
804          */
805         i8254_restore();
806
807         /*
808          * When booting without verbose messages, it's pointless to run the
809          * calibrate_clocks() calibration code, when we don't use the
810          * results in any way. With bootverbose, we are at least printing
811          *  this information to the kernel log.
812          */
813         if (calibrate_timers_with_rtc == 0 && !bootverbose)
814                 goto skip_rtc_based;
815
816         freq = calibrate_clocks();
817 #ifdef CLK_CALIBRATION_LOOP
818         if (bootverbose) {
819                 int c;
820
821                 cnpoll(TRUE);
822                 kprintf("Press a key on the console to "
823                         "abort clock calibration\n");
824                 while ((c = cncheckc()) == -1 || c == NOKEY)
825                         calibrate_clocks();
826                 cnpoll(FALSE);
827         }
828 #endif
829
830         /*
831          * Use the calibrated i8254 frequency if it seems reasonable.
832          * Otherwise use the default, and don't use the calibrated i586
833          * frequency.
834          */
835         delta = freq > i8254_cputimer.freq ? 
836                         freq - i8254_cputimer.freq : i8254_cputimer.freq - freq;
837         if (delta < i8254_cputimer.freq / 100) {
838                 if (calibrate_timers_with_rtc == 0) {
839                         kprintf(
840 "hw.calibrate_timers_with_rtc not set - using default i8254 frequency\n");
841                         freq = i8254_cputimer.freq;
842                 }
843                 /*
844                  * NOTE:
845                  * Interrupt timer's freq must be adjusted
846                  * before we change the cuptimer's frequency.
847                  */
848                 i8254_cputimer_intr.freq = freq;
849                 cputimer_set_frequency(&i8254_cputimer, freq);
850         } else {
851                 if (bootverbose)
852                         kprintf(
853                     "%d Hz differs from default of %d Hz by more than 1%%\n",
854                                freq, i8254_cputimer.freq);
855                 tsc_frequency = 0;
856         }
857
858         if (tsc_frequency != 0 && calibrate_timers_with_rtc == 0) {
859                 kprintf(
860 "hw.calibrate_timers_with_rtc not set - using old calibration method\n");
861                 tsc_frequency = 0;
862         }
863
864 skip_rtc_based:
865         if (tsc_present && tsc_frequency == 0) {
866                 /*
867                  * Calibration of the i586 clock relative to the mc146818A
868                  * clock failed.  Do a less accurate calibration relative
869                  * to the i8254 clock.
870                  */
871                 u_int64_t old_tsc = rdtsc();
872
873                 DELAY(1000000);
874                 tsc_frequency = rdtsc() - old_tsc;
875                 if (bootverbose && calibrate_timers_with_rtc) {
876                         kprintf("TSC clock: %jd Hz (Method B)\n",
877                             (intmax_t)tsc_frequency);
878                 }
879         }
880
881         if (tsc_present) {
882                 kprintf("TSC%s clock: %jd Hz\n",
883                     tsc_invariant ? " invariant" : "",
884                     (intmax_t)tsc_frequency);
885         }
886
887         EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown, NULL, SHUTDOWN_PRI_LAST);
888 }
889
890 /*
891  * Sync the time of day back to the RTC on shutdown, but only if
892  * we have already loaded it and have not crashed.
893  */
894 static void
895 resettodr_on_shutdown(void *arg __unused)
896 {
897         if (rtc_loaded && panicstr == NULL) {
898                 resettodr();
899         }
900 }
901
902 /*
903  * Initialize the time of day register, based on the time base which is, e.g.
904  * from a filesystem.
905  */
906 void
907 inittodr(time_t base)
908 {
909         unsigned long   sec, days;
910         int             year, month;
911         int             y, m;
912         struct timespec ts;
913
914         if (base) {
915                 ts.tv_sec = base;
916                 ts.tv_nsec = 0;
917                 set_timeofday(&ts);
918         }
919
920         /* Look if we have a RTC present and the time is valid */
921         if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
922                 goto wrong_time;
923
924         /* wait for time update to complete */
925         /* If RTCSA_TUP is zero, we have at least 244us before next update */
926         crit_enter();
927         while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
928                 crit_exit();
929                 crit_enter();
930         }
931
932         days = 0;
933 #ifdef USE_RTC_CENTURY
934         year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
935 #else
936         year = readrtc(RTC_YEAR) + 1900;
937         if (year < 1970)
938                 year += 100;
939 #endif
940         if (year < 1970) {
941                 crit_exit();
942                 goto wrong_time;
943         }
944         month = readrtc(RTC_MONTH);
945         for (m = 1; m < month; m++)
946                 days += daysinmonth[m-1];
947         if ((month > 2) && LEAPYEAR(year))
948                 days ++;
949         days += readrtc(RTC_DAY) - 1;
950         for (y = 1970; y < year; y++)
951                 days += DAYSPERYEAR + LEAPYEAR(y);
952         sec = ((( days * 24 +
953                   readrtc(RTC_HRS)) * 60 +
954                   readrtc(RTC_MIN)) * 60 +
955                   readrtc(RTC_SEC));
956         /* sec now contains the number of seconds, since Jan 1 1970,
957            in the local time zone */
958
959         sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
960
961         y = (int)(time_second - sec);
962         if (y <= -2 || y >= 2) {
963                 /* badly off, adjust it */
964                 ts.tv_sec = sec;
965                 ts.tv_nsec = 0;
966                 set_timeofday(&ts);
967         }
968         rtc_loaded = 1;
969         crit_exit();
970         return;
971
972 wrong_time:
973         kprintf("Invalid time in real time clock.\n");
974         kprintf("Check and reset the date immediately!\n");
975 }
976
977 /*
978  * Write system time back to RTC
979  */
980 void
981 resettodr(void)
982 {
983         struct timeval tv;
984         unsigned long tm;
985         int m;
986         int y;
987
988         if (disable_rtc_set)
989                 return;
990
991         microtime(&tv);
992         tm = tv.tv_sec;
993
994         crit_enter();
995         /* Disable RTC updates and interrupts. */
996         writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
997
998         /* Calculate local time to put in RTC */
999
1000         tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1001
1002         writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60;    /* Write back Seconds */
1003         writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60;    /* Write back Minutes */
1004         writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24;    /* Write back Hours   */
1005
1006         /* We have now the days since 01-01-1970 in tm */
1007         writertc(RTC_WDAY, (tm+4)%7);                   /* Write back Weekday */
1008         for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
1009              tm >= m;
1010              y++,      m = DAYSPERYEAR + LEAPYEAR(y))
1011              tm -= m;
1012
1013         /* Now we have the years in y and the day-of-the-year in tm */
1014         writertc(RTC_YEAR, bin2bcd(y%100));             /* Write back Year    */
1015 #ifdef USE_RTC_CENTURY
1016         writertc(RTC_CENTURY, bin2bcd(y/100));          /* ... and Century    */
1017 #endif
1018         for (m = 0; ; m++) {
1019                 int ml;
1020
1021                 ml = daysinmonth[m];
1022                 if (m == 1 && LEAPYEAR(y))
1023                         ml++;
1024                 if (tm < ml)
1025                         break;
1026                 tm -= ml;
1027         }
1028
1029         writertc(RTC_MONTH, bin2bcd(m + 1));            /* Write back Month   */
1030         writertc(RTC_DAY, bin2bcd(tm + 1));             /* Write back Month Day */
1031
1032         /* Reenable RTC updates and interrupts. */
1033         writertc(RTC_STATUSB, rtc_statusb);
1034         crit_exit();
1035 }
1036
1037 static int
1038 i8254_ioapic_trial(int irq, struct cputimer_intr *cti)
1039 {
1040         sysclock_t base;
1041         long lastcnt;
1042
1043         /*
1044          * Following code assumes the 8254 is the cpu timer,
1045          * so make sure it is.
1046          */
1047         KKASSERT(sys_cputimer == &i8254_cputimer);
1048         KKASSERT(cti == &i8254_cputimer_intr);
1049
1050         lastcnt = get_interrupt_counter(irq, mycpuid);
1051
1052         /*
1053          * Force an 8254 Timer0 interrupt and wait 1/100s for
1054          * it to happen, then see if we got it.
1055          */
1056         kprintf("IOAPIC: testing 8254 interrupt delivery\n");
1057
1058         i8254_intr_reload(cti, 2);
1059         base = sys_cputimer->count();
1060         while (sys_cputimer->count() - base < sys_cputimer->freq / 100)
1061                 ; /* nothing */
1062
1063         if (get_interrupt_counter(irq, mycpuid) - lastcnt == 0)
1064                 return ENOENT;
1065         return 0;
1066 }
1067
1068 /*
1069  * Start both clocks running.  DragonFly note: the stat clock is no longer
1070  * used.  Instead, 8254 based systimers are used for all major clock
1071  * interrupts.
1072  */
1073 static void
1074 i8254_intr_initclock(struct cputimer_intr *cti, boolean_t selected)
1075 {
1076         void *clkdesc = NULL;
1077         int irq = 0, mixed_mode = 0, error;
1078
1079         KKASSERT(mycpuid == 0);
1080         callout_init_mp(&sysbeepstop_ch);
1081
1082         if (!selected && i8254_intr_disable)
1083                 goto nointr;
1084
1085         /*
1086          * The stat interrupt mask is different without the
1087          * statistics clock.  Also, don't set the interrupt
1088          * flag which would normally cause the RTC to generate
1089          * interrupts.
1090          */
1091         rtc_statusb = RTCSB_24HR;
1092
1093         /* Finish initializing 8254 timer 0. */
1094         if (ioapic_enable) {
1095                 irq = machintr_legacy_intr_find(0, INTR_TRIGGER_EDGE,
1096                         INTR_POLARITY_HIGH);
1097                 if (irq < 0) {
1098 mixed_mode_setup:
1099                         error = ioapic_conf_legacy_extint(0);
1100                         if (!error) {
1101                                 irq = machintr_legacy_intr_find(0,
1102                                     INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH);
1103                                 if (irq < 0)
1104                                         error = ENOENT;
1105                         }
1106
1107                         if (error) {
1108                                 if (!selected) {
1109                                         kprintf("IOAPIC: setup mixed mode for "
1110                                                 "irq 0 failed: %d\n", error);
1111                                         goto nointr;
1112                                 } else {
1113                                         panic("IOAPIC: setup mixed mode for "
1114                                               "irq 0 failed: %d\n", error);
1115                                 }
1116                         }
1117                         mixed_mode = 1;
1118                 }
1119                 clkdesc = register_int(irq, clkintr, NULL, "clk",
1120                                        NULL,
1121                                        INTR_EXCL | INTR_CLOCK |
1122                                        INTR_NOPOLL | INTR_MPSAFE |
1123                                        INTR_NOENTROPY, 0);
1124         } else {
1125                 register_int(0, clkintr, NULL, "clk", NULL,
1126                              INTR_EXCL | INTR_CLOCK |
1127                              INTR_NOPOLL | INTR_MPSAFE |
1128                              INTR_NOENTROPY, 0);
1129         }
1130
1131         /* Initialize RTC. */
1132         writertc(RTC_STATUSA, rtc_statusa);
1133         writertc(RTC_STATUSB, RTCSB_24HR);
1134
1135         if (ioapic_enable) {
1136                 error = i8254_ioapic_trial(irq, cti);
1137                 if (error) {
1138                         if (mixed_mode) {
1139                                 if (!selected) {
1140                                         kprintf("IOAPIC: mixed mode for irq %d "
1141                                                 "trial failed: %d\n",
1142                                                 irq, error);
1143                                         goto nointr;
1144                                 } else {
1145                                         panic("IOAPIC: mixed mode for irq %d "
1146                                               "trial failed: %d\n", irq, error);
1147                                 }
1148                         } else {
1149                                 kprintf("IOAPIC: warning 8254 is not connected "
1150                                         "to the correct pin, try mixed mode\n");
1151                                 unregister_int(clkdesc, 0);
1152                                 goto mixed_mode_setup;
1153                         }
1154                 }
1155         }
1156         return;
1157
1158 nointr:
1159         i8254_nointr = 1; /* don't try to register again */
1160         cputimer_intr_deregister(cti);
1161 }
1162
1163 void
1164 setstatclockrate(int newhz)
1165 {
1166         if (newhz == RTC_PROFRATE)
1167                 rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF;
1168         else
1169                 rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
1170         writertc(RTC_STATUSA, rtc_statusa);
1171 }
1172
1173 #if 0
1174 static unsigned
1175 tsc_get_timecount(struct timecounter *tc)
1176 {
1177         return (rdtsc());
1178 }
1179 #endif
1180
1181 #ifdef KERN_TIMESTAMP
1182 #define KERN_TIMESTAMP_SIZE 16384
1183 static u_long tsc[KERN_TIMESTAMP_SIZE] ;
1184 SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc,
1185         sizeof(tsc), "LU", "Kernel timestamps");
1186 void  
1187 _TSTMP(u_int32_t x)
1188 {
1189         static int i;
1190
1191         tsc[i] = (u_int32_t)rdtsc();
1192         tsc[i+1] = x;
1193         i = i + 2;
1194         if (i >= KERN_TIMESTAMP_SIZE)
1195                 i = 0;
1196         tsc[i] = 0; /* mark last entry */
1197 }
1198 #endif /* KERN_TIMESTAMP */
1199
1200 /*
1201  *
1202  */
1203
1204 static int
1205 hw_i8254_timestamp(SYSCTL_HANDLER_ARGS)
1206 {
1207     sysclock_t count;
1208     uint64_t tscval;
1209     char buf[32];
1210
1211     crit_enter();
1212     if (sys_cputimer == &i8254_cputimer)
1213         count = sys_cputimer->count();
1214     else
1215         count = 0;
1216     if (tsc_present)
1217         tscval = rdtsc();
1218     else
1219         tscval = 0;
1220     crit_exit();
1221     ksnprintf(buf, sizeof(buf), "%08x %016llx", count, (long long)tscval);
1222     return(SYSCTL_OUT(req, buf, strlen(buf) + 1));
1223 }
1224
1225 struct tsc_mpsync_arg {
1226         volatile uint64_t       tsc_target;
1227         volatile int            tsc_mpsync;
1228 };
1229
1230 struct tsc_mpsync_thr {
1231         volatile int            tsc_done_cnt;
1232         volatile int            tsc_mpsync_cnt;
1233 };
1234
1235 static void
1236 tsc_mpsync_test_remote(void *xarg)
1237 {
1238         struct tsc_mpsync_arg *arg = xarg;
1239         uint64_t tsc;
1240
1241         tsc = rdtsc_ordered();
1242         if (tsc < arg->tsc_target)
1243                 arg->tsc_mpsync = 0;
1244 }
1245
1246 static void
1247 tsc_mpsync_test_loop(struct tsc_mpsync_arg *arg)
1248 {
1249         struct globaldata *gd = mycpu;
1250         uint64_t test_end, test_begin;
1251         u_int i;
1252
1253         if (bootverbose) {
1254                 kprintf("cpu%d: TSC testing MP synchronization ...\n",
1255                     gd->gd_cpuid);
1256         }
1257
1258         test_begin = rdtsc_ordered();
1259         /* Run test for 100ms */
1260         test_end = test_begin + (tsc_frequency / 10);
1261
1262         arg->tsc_mpsync = 1;
1263         arg->tsc_target = test_begin;
1264
1265 #define TSC_TEST_TRYMAX         1000000 /* Make sure we could stop */
1266 #define TSC_TEST_TRYMIN         50000
1267
1268         for (i = 0; i < TSC_TEST_TRYMAX; ++i) {
1269                 struct lwkt_cpusync cs;
1270
1271                 crit_enter();
1272                 lwkt_cpusync_init(&cs, gd->gd_other_cpus,
1273                     tsc_mpsync_test_remote, arg);
1274                 lwkt_cpusync_interlock(&cs);
1275                 arg->tsc_target = rdtsc_ordered();
1276                 cpu_mfence();
1277                 lwkt_cpusync_deinterlock(&cs);
1278                 crit_exit();
1279
1280                 if (!arg->tsc_mpsync) {
1281                         kprintf("cpu%d: TSC is not MP synchronized @%u\n",
1282                             gd->gd_cpuid, i);
1283                         break;
1284                 }
1285                 if (arg->tsc_target > test_end && i >= TSC_TEST_TRYMIN)
1286                         break;
1287         }
1288
1289 #undef TSC_TEST_TRYMIN
1290 #undef TSC_TEST_TRYMAX
1291
1292         if (arg->tsc_target == test_begin) {
1293                 kprintf("cpu%d: TSC does not tick?!\n", gd->gd_cpuid);
1294                 /* XXX disable TSC? */
1295                 tsc_invariant = 0;
1296                 arg->tsc_mpsync = 0;
1297                 return;
1298         }
1299
1300         if (arg->tsc_mpsync && bootverbose) {
1301                 kprintf("cpu%d: TSC is MP synchronized after %u tries\n",
1302                     gd->gd_cpuid, i);
1303         }
1304 }
1305
1306 static void
1307 tsc_mpsync_ap_thread(void *xthr)
1308 {
1309         struct tsc_mpsync_thr *thr = xthr;
1310         struct tsc_mpsync_arg arg;
1311
1312         tsc_mpsync_test_loop(&arg);
1313         if (arg.tsc_mpsync) {
1314                 atomic_add_int(&thr->tsc_mpsync_cnt, 1);
1315                 cpu_sfence();
1316         }
1317         atomic_add_int(&thr->tsc_done_cnt, 1);
1318
1319         lwkt_exit();
1320 }
1321
1322 static void
1323 tsc_mpsync_test(void)
1324 {
1325         struct tsc_mpsync_arg arg;
1326
1327         if (!tsc_invariant) {
1328                 /* Not even invariant TSC */
1329                 return;
1330         }
1331
1332         if (ncpus == 1) {
1333                 /* Only one CPU */
1334                 tsc_mpsync = 1;
1335                 return;
1336         }
1337
1338         /*
1339          * Forcing can be used w/qemu to reduce contention
1340          */
1341         TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync);
1342
1343         if (tsc_mpsync == 0) {
1344                 switch(cpu_vendor_id) {
1345                 case CPU_VENDOR_INTEL:
1346                         /*
1347                          * Intel probably works
1348                          */
1349                         break;
1350                 case CPU_VENDOR_AMD:
1351                         /*
1352                          * AMD < Ryzen probably doesn't work
1353                          */
1354                         if (CPUID_TO_FAMILY(cpu_id) < 0x17)
1355                                 return;
1356                         break;
1357                 default:
1358                         /* probably won't work */
1359                         return;
1360                 }
1361         }
1362
1363         /*
1364          * Test even if forced above.  If forced, we will use the TSC
1365          * even if the test fails.
1366          */
1367         kprintf("TSC testing MP synchronization ...\n");
1368
1369         tsc_mpsync_test_loop(&arg);
1370         if (arg.tsc_mpsync) {
1371                 struct tsc_mpsync_thr thr;
1372                 int cpu;
1373
1374                 /*
1375                  * Test TSC MP synchronization on APs.
1376                  */
1377
1378                 thr.tsc_done_cnt = 1;
1379                 thr.tsc_mpsync_cnt = 1;
1380
1381                 for (cpu = 0; cpu < ncpus; ++cpu) {
1382                         if (cpu == mycpuid)
1383                                 continue;
1384
1385                         lwkt_create(tsc_mpsync_ap_thread, &thr, NULL,
1386                             NULL, 0, cpu, "tsc mpsync %d", cpu);
1387                 }
1388
1389                 while (thr.tsc_done_cnt != ncpus) {
1390                         cpu_pause();
1391                         cpu_lfence();
1392                 }
1393                 if (thr.tsc_mpsync_cnt == ncpus)
1394                         tsc_mpsync = 1;
1395         }
1396
1397         if (tsc_mpsync)
1398                 kprintf("TSC is MP synchronized\n");
1399         else
1400                 kprintf("TSC is not MP synchronized\n");
1401 }
1402 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL);
1403
1404 #define TSC_CPUTIMER_FREQMAX    128000000       /* 128Mhz */
1405
1406 static int tsc_cputimer_shift;
1407
1408 static void
1409 tsc_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
1410 {
1411         timer->base = 0;
1412         timer->base = oldclock - timer->count();
1413 }
1414
1415 static __inline sysclock_t
1416 tsc_cputimer_count(void)
1417 {
1418         uint64_t tsc;
1419
1420         tsc = rdtsc();
1421         tsc >>= tsc_cputimer_shift;
1422
1423         return (tsc + tsc_cputimer.base);
1424 }
1425
1426 static sysclock_t
1427 tsc_cputimer_count_lfence(void)
1428 {
1429         cpu_lfence();
1430         return tsc_cputimer_count();
1431 }
1432
1433 static sysclock_t
1434 tsc_cputimer_count_mfence(void)
1435 {
1436         cpu_mfence();
1437         return tsc_cputimer_count();
1438 }
1439
1440 static uint64_t
1441 tsc_cpucounter_count_lfence(void)
1442 {
1443
1444         cpu_lfence();
1445         return (rdtsc());
1446 }
1447
1448 static uint64_t
1449 tsc_cpucounter_count_mfence(void)
1450 {
1451
1452         cpu_mfence();
1453         return (rdtsc());
1454 }
1455
1456 static void
1457 tsc_cputimer_register(void)
1458 {
1459         uint64_t freq;
1460         int enable = 1;
1461
1462         if (!tsc_mpsync) {
1463                 if (tsc_invariant) {
1464                         /* Per-cpu cpucounter still works. */
1465                         goto regcnt;
1466                 }
1467                 return;
1468         }
1469
1470         TUNABLE_INT_FETCH("hw.tsc_cputimer_enable", &enable);
1471         if (!enable)
1472                 return;
1473
1474         freq = tsc_frequency;
1475         while (freq > TSC_CPUTIMER_FREQMAX) {
1476                 freq >>= 1;
1477                 ++tsc_cputimer_shift;
1478         }
1479         kprintf("TSC: cputimer freq %ju, shift %d\n",
1480             (uintmax_t)freq, tsc_cputimer_shift);
1481
1482         tsc_cputimer.freq = freq;
1483
1484         if (cpu_vendor_id == CPU_VENDOR_INTEL)
1485                 tsc_cputimer.count = tsc_cputimer_count_lfence;
1486         else
1487                 tsc_cputimer.count = tsc_cputimer_count_mfence; /* safe bet */
1488
1489         cputimer_register(&tsc_cputimer);
1490         cputimer_select(&tsc_cputimer, 0);
1491
1492         tsc_cpucounter.flags |= CPUCOUNTER_FLAG_MPSYNC;
1493 regcnt:
1494         tsc_cpucounter.freq = tsc_frequency;
1495         if (cpu_vendor_id == CPU_VENDOR_INTEL) {
1496                 tsc_cpucounter.count =
1497                     tsc_cpucounter_count_lfence;
1498         } else {
1499                 tsc_cpucounter.count =
1500                     tsc_cpucounter_count_mfence; /* safe bet */
1501         }
1502         cpucounter_register(&tsc_cpucounter);
1503 }
1504 SYSINIT(tsc_cputimer_reg, SI_BOOT2_POST_SMP, SI_ORDER_FIRST,
1505         tsc_cputimer_register, NULL);
1506
1507 SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254");
1508 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0,
1509             "frequency");
1510 SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD,
1511             0, 0, hw_i8254_timestamp, "A", "");
1512
1513 SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD,
1514             &tsc_present, 0, "TSC Available");
1515 SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD,
1516             &tsc_invariant, 0, "Invariant TSC");
1517 SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD,
1518             &tsc_mpsync, 0, "TSC is synchronized across CPUs");
1519 SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD,
1520             &tsc_frequency, 0, "TSC Frequency");