kernel - Refactor smp collision statistics
[dragonfly.git] / sys / platform / pc64 / isa / clock.c
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * Copyright (c) 2008 The DragonFly Project.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * William Jolitz and Don Ahn.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *      from: @(#)clock.c       7.2 (Berkeley) 5/12/91
34  * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $
35  */
36
37 /*
38  * Routines to handle clock hardware.
39  */
40
41 /*
42  * inittodr, settodr and support routines written
43  * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at>
44  *
45  * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94
46  */
47
48 #if 0
49 #include "opt_clock.h"
50 #endif
51
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/eventhandler.h>
55 #include <sys/time.h>
56 #include <sys/kernel.h>
57 #include <sys/bus.h>
58 #include <sys/sysctl.h>
59 #include <sys/cons.h>
60 #include <sys/kbio.h>
61 #include <sys/systimer.h>
62 #include <sys/globaldata.h>
63 #include <sys/machintr.h>
64 #include <sys/interrupt.h>
65
66 #include <sys/thread2.h>
67
68 #include <machine/clock.h>
69 #include <machine/cputypes.h>
70 #include <machine/frame.h>
71 #include <machine/ipl.h>
72 #include <machine/limits.h>
73 #include <machine/md_var.h>
74 #include <machine/psl.h>
75 #include <machine/segments.h>
76 #include <machine/smp.h>
77 #include <machine/specialreg.h>
78 #include <machine/intr_machdep.h>
79
80 #include <machine_base/apic/ioapic.h>
81 #include <machine_base/apic/ioapic_abi.h>
82 #include <machine_base/icu/icu.h>
83 #include <bus/isa/isa.h>
84 #include <bus/isa/rtc.h>
85 #include <machine_base/isa/timerreg.h>
86
87 static void i8254_restore(void);
88 static void resettodr_on_shutdown(void *arg __unused);
89
90 /*
91  * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
92  * can use a simple formula for leap years.
93  */
94 #define LEAPYEAR(y) ((u_int)(y) % 4 == 0)
95 #define DAYSPERYEAR   (31+28+31+30+31+30+31+31+30+31+30+31)
96
97 #ifndef TIMER_FREQ
98 #define TIMER_FREQ   1193182
99 #endif
100
101 static uint8_t i8254_walltimer_sel;
102 static uint16_t i8254_walltimer_cntr;
103
104 int     adjkerntz;              /* local offset from GMT in seconds */
105 int     disable_rtc_set;        /* disable resettodr() if != 0 */
106 int     tsc_present;
107 int     tsc_invariant;
108 int     tsc_mpsync;
109 int     tsc_is_broken;
110 int     wall_cmos_clock;        /* wall CMOS clock assumed if != 0 */
111 int     timer0_running;
112 tsc_uclock_t tsc_frequency;
113 tsc_uclock_t tsc_oneus_approx;  /* always at least 1, approx only */
114
115 enum tstate { RELEASED, ACQUIRED };
116 enum tstate timer0_state;
117 enum tstate timer1_state;
118 enum tstate timer2_state;
119
120 static  int     beeping = 0;
121 static  const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
122 static  u_char  rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
123 static  u_char  rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
124 static  int     rtc_loaded;
125
126 static int i8254_cputimer_div;
127
128 static int i8254_nointr;
129 static int i8254_intr_disable = 1;
130 TUNABLE_INT("hw.i8254.intr_disable", &i8254_intr_disable);
131
132 static int calibrate_timers_with_rtc = 0;
133 TUNABLE_INT("hw.calibrate_timers_with_rtc", &calibrate_timers_with_rtc);
134
135 static struct callout sysbeepstop_ch;
136
137 static sysclock_t i8254_cputimer_count(void);
138 static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last);
139 static void i8254_cputimer_destruct(struct cputimer *cputimer);
140
141 static struct cputimer  i8254_cputimer = {
142     .next               = SLIST_ENTRY_INITIALIZER,
143     .name               = "i8254",
144     .pri                = CPUTIMER_PRI_8254,
145     .type               = 0,    /* determined later */
146     .count              = i8254_cputimer_count,
147     .fromhz             = cputimer_default_fromhz,
148     .fromus             = cputimer_default_fromus,
149     .construct          = i8254_cputimer_construct,
150     .destruct           = i8254_cputimer_destruct,
151     .freq               = TIMER_FREQ
152 };
153
154 static sysclock_t tsc_cputimer_count_mfence(void);
155 static sysclock_t tsc_cputimer_count_lfence(void);
156 static void tsc_cputimer_construct(struct cputimer *, sysclock_t);
157
158 static struct cputimer  tsc_cputimer = {
159     .next               = SLIST_ENTRY_INITIALIZER,
160     .name               = "TSC",
161     .pri                = CPUTIMER_PRI_TSC,
162     .type               = CPUTIMER_TSC,
163     .count              = NULL, /* determined later */
164     .fromhz             = cputimer_default_fromhz,
165     .fromus             = cputimer_default_fromus,
166     .construct          = tsc_cputimer_construct,
167     .destruct           = cputimer_default_destruct,
168     .freq               = 0     /* determined later */
169 };
170
171 static struct cpucounter tsc_cpucounter = {
172     .freq               = 0,    /* determined later */
173     .count              = NULL, /* determined later */
174     .flags              = 0,    /* adjusted later */
175     .prio               = CPUCOUNTER_PRIO_TSC,
176     .type               = CPUCOUNTER_TSC
177 };
178
179 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t);
180 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *);
181 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t);
182
183 static struct cputimer_intr i8254_cputimer_intr = {
184     .freq = TIMER_FREQ,
185     .reload = i8254_intr_reload,
186     .enable = cputimer_intr_default_enable,
187     .config = i8254_intr_config,
188     .restart = cputimer_intr_default_restart,
189     .pmfixup = cputimer_intr_default_pmfixup,
190     .initclock = i8254_intr_initclock,
191     .pcpuhand = NULL,
192     .next = SLIST_ENTRY_INITIALIZER,
193     .name = "i8254",
194     .type = CPUTIMER_INTR_8254,
195     .prio = CPUTIMER_INTR_PRIO_8254,
196     .caps = CPUTIMER_INTR_CAP_PS,
197     .priv = NULL
198 };
199
200 /*
201  * timer0 clock interrupt.  Timer0 is in one-shot mode and has stopped
202  * counting as of this interrupt.  We use timer1 in free-running mode (not
203  * generating any interrupts) as our main counter.  Each cpu has timeouts
204  * pending.
205  *
206  * This code is INTR_MPSAFE and may be called without the BGL held.
207  */
208 static void
209 clkintr(void *dummy, void *frame_arg)
210 {
211         static sysclock_t sysclock_count;       /* NOTE! Must be static */
212         struct globaldata *gd = mycpu;
213         struct globaldata *gscan;
214         int n;
215
216         /*
217          * SWSTROBE mode is a one-shot, the timer is no longer running
218          */
219         timer0_running = 0;
220
221         /*
222          * XXX the dispatcher needs work.  right now we call systimer_intr()
223          * directly or via IPI for any cpu with systimers queued, which is
224          * usually *ALL* of them.  We need to use the LAPIC timer for this.
225          */
226         sysclock_count = sys_cputimer->count();
227         for (n = 0; n < ncpus; ++n) {
228             gscan = globaldata_find(n);
229             if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL)
230                 continue;
231             if (gscan != gd) {
232                 lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr, 
233                                 &sysclock_count, 1);
234             } else {
235                 systimer_intr(&sysclock_count, 0, frame_arg);
236             }
237         }
238 }
239
240
241 /*
242  * NOTE! not MP safe.
243  */
244 int
245 acquire_timer2(int mode)
246 {
247         if (timer2_state != RELEASED)
248                 return (-1);
249         timer2_state = ACQUIRED;
250
251         /*
252          * This access to the timer registers is as atomic as possible
253          * because it is a single instruction.  We could do better if we
254          * knew the rate.
255          */
256         outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f));
257         return (0);
258 }
259
260 int
261 release_timer2(void)
262 {
263         if (timer2_state != ACQUIRED)
264                 return (-1);
265         outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT);
266         timer2_state = RELEASED;
267         return (0);
268 }
269
270 #include "opt_ddb.h"
271 #ifdef DDB
272 #include <ddb/ddb.h>
273
274 DB_SHOW_COMMAND(rtc, rtc)
275 {
276         kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n",
277                rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY),
278                rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC),
279                rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR));
280 }
281 #endif /* DDB */
282
283 /*
284  * Return the current cpu timer count as a 32 bit integer.
285  */
286 static
287 sysclock_t
288 i8254_cputimer_count(void)
289 {
290         static uint16_t cputimer_last;
291         uint16_t count;
292         sysclock_t ret;
293
294         clock_lock();
295         outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH);
296         count = (uint8_t)inb(i8254_walltimer_cntr);             /* get countdown */
297         count |= ((uint8_t)inb(i8254_walltimer_cntr) << 8);
298         count = -count;                                 /* -> countup */
299         if (count < cputimer_last)                      /* rollover */
300                 i8254_cputimer.base += 0x00010000;
301         ret = i8254_cputimer.base | count;
302         cputimer_last = count;
303         clock_unlock();
304         return(ret);
305 }
306
307 /*
308  * This function is called whenever the system timebase changes, allowing
309  * us to calculate what is needed to convert a system timebase tick 
310  * into an 8254 tick for the interrupt timer.  If we can convert to a
311  * simple shift, multiplication, or division, we do so.  Otherwise 64
312  * bit arithmatic is required every time the interrupt timer is reloaded.
313  */
314 static void
315 i8254_intr_config(struct cputimer_intr *cti, const struct cputimer *timer)
316 {
317     int freq;
318     int div;
319
320     /*
321      * Will a simple divide do the trick?
322      */
323     div = (timer->freq + (cti->freq / 2)) / cti->freq;
324     freq = cti->freq * div;
325
326     if (freq >= timer->freq - 1 && freq <= timer->freq + 1)
327         i8254_cputimer_div = div;
328     else
329         i8254_cputimer_div = 0;
330 }
331
332 /*
333  * Reload for the next timeout.  It is possible for the reload value
334  * to be 0 or negative, indicating that an immediate timer interrupt
335  * is desired.  For now make the minimum 2 ticks.
336  *
337  * We may have to convert from the system timebase to the 8254 timebase.
338  */
339 static void
340 i8254_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
341 {
342     uint16_t count;
343
344     if (i8254_cputimer_div)
345         reload /= i8254_cputimer_div;
346     else
347         reload = (int64_t)reload * cti->freq / sys_cputimer->freq;
348
349     if ((int)reload < 2)
350         reload = 2;
351
352     clock_lock();
353     if (timer0_running) {
354         outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);     /* count-down timer */
355         count = (uint8_t)inb(TIMER_CNTR0);              /* lsb */
356         count |= ((uint8_t)inb(TIMER_CNTR0) << 8);      /* msb */
357         if (reload < count) {
358             outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
359             outb(TIMER_CNTR0, (uint8_t)reload);         /* lsb */
360             outb(TIMER_CNTR0, (uint8_t)(reload >> 8));  /* msb */
361         }
362     } else {
363         timer0_running = 1;
364         if (reload > 0xFFFF)
365             reload = 0;         /* full count */
366         outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
367         outb(TIMER_CNTR0, (uint8_t)reload);             /* lsb */
368         outb(TIMER_CNTR0, (uint8_t)(reload >> 8));      /* msb */
369     }
370     clock_unlock();
371 }
372
373 /*
374  * DELAY(usec)       - Spin for the specified number of microseconds.
375  * DRIVERSLEEP(usec) - Spin for the specified number of microseconds,
376  *                     but do a thread switch in the loop
377  *
378  * Relies on timer 1 counting down from (cputimer_freq / hz)
379  * Note: timer had better have been programmed before this is first used!
380  */
381 static void
382 DODELAY(int n, int doswitch)
383 {
384         ssysclock_t delta, ticks_left;
385         sysclock_t prev_tick, tick;
386
387 #ifdef DELAYDEBUG
388         int getit_calls = 1;
389         int n1;
390         static int state = 0;
391
392         if (state == 0) {
393                 state = 1;
394                 for (n1 = 1; n1 <= 10000000; n1 *= 10)
395                         DELAY(n1);
396                 state = 2;
397         }
398         if (state == 1)
399                 kprintf("DELAY(%d)...", n);
400 #endif
401         /*
402          * Guard against the timer being uninitialized if we are called
403          * early for console i/o.
404          */
405         if (timer0_state == RELEASED)
406                 i8254_restore();
407
408         /*
409          * Read the counter first, so that the rest of the setup overhead is
410          * counted.  Then calculate the number of hardware timer ticks
411          * required, rounding up to be sure we delay at least the requested
412          * number of microseconds.
413          */
414         prev_tick = sys_cputimer->count();
415         ticks_left = ((u_int)n * (int64_t)sys_cputimer->freq + 999999) /
416                      1000000;
417
418         /*
419          * Loop until done.
420          */
421         while (ticks_left > 0) {
422                 tick = sys_cputimer->count();
423 #ifdef DELAYDEBUG
424                 ++getit_calls;
425 #endif
426                 delta = tick - prev_tick;
427                 prev_tick = tick;
428                 if (delta < 0)
429                         delta = 0;
430                 ticks_left -= delta;
431                 if (doswitch && ticks_left > 0)
432                         lwkt_switch();
433                 cpu_pause();
434         }
435 #ifdef DELAYDEBUG
436         if (state == 1)
437                 kprintf(" %d calls to getit() at %d usec each\n",
438                        getit_calls, (n + 5) / getit_calls);
439 #endif
440 }
441
442 /*
443  * DELAY() never switches.
444  */
445 void
446 DELAY(int n)
447 {
448         DODELAY(n, 0);
449 }
450
451 /*
452  * Returns non-zero if the specified time period has elapsed.  Call
453  * first with last_clock set to 0.
454  */
455 int
456 CHECKTIMEOUT(TOTALDELAY *tdd)
457 {
458         sysclock_t delta;
459         int us;
460
461         if (tdd->started == 0) {
462                 if (timer0_state == RELEASED)
463                         i8254_restore();
464                 tdd->last_clock = sys_cputimer->count();
465                 tdd->started = 1;
466                 return(0);
467         }
468         delta = sys_cputimer->count() - tdd->last_clock;
469         us = (u_int64_t)delta * (u_int64_t)1000000 /
470              (u_int64_t)sys_cputimer->freq;
471         tdd->last_clock += (u_int64_t)us * (u_int64_t)sys_cputimer->freq /
472                            1000000;
473         tdd->us -= us;
474         return (tdd->us < 0);
475 }
476
477
478 /*
479  * DRIVERSLEEP() does not switch if called with a spinlock held or
480  * from a hard interrupt.
481  */
482 void
483 DRIVERSLEEP(int usec)
484 {
485         globaldata_t gd = mycpu;
486
487         if (gd->gd_intr_nesting_level || gd->gd_spinlocks) {
488                 DODELAY(usec, 0);
489         } else {
490                 DODELAY(usec, 1);
491         }
492 }
493
494 static void
495 sysbeepstop(void *chan)
496 {
497         outb(IO_PPI, inb(IO_PPI)&0xFC); /* disable counter2 output to speaker */
498         beeping = 0;
499         release_timer2();
500 }
501
502 int
503 sysbeep(int pitch, int period)
504 {
505         if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
506                 return(-1);
507         if (sysbeep_enable == 0)
508                 return(-1);
509         /*
510          * Nobody else is using timer2, we do not need the clock lock
511          */
512         outb(TIMER_CNTR2, pitch);
513         outb(TIMER_CNTR2, (pitch>>8));
514         if (!beeping) {
515                 /* enable counter2 output to speaker */
516                 outb(IO_PPI, inb(IO_PPI) | 3);
517                 beeping = period;
518                 callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL);
519         }
520         return (0);
521 }
522
523 /*
524  * RTC support routines
525  */
526
527 int
528 rtcin(int reg)
529 {
530         u_char val;
531
532         crit_enter();
533         outb(IO_RTC, reg);
534         inb(0x84);
535         val = inb(IO_RTC + 1);
536         inb(0x84);
537         crit_exit();
538         return (val);
539 }
540
541 static __inline void
542 writertc(u_char reg, u_char val)
543 {
544         crit_enter();
545         inb(0x84);
546         outb(IO_RTC, reg);
547         inb(0x84);
548         outb(IO_RTC + 1, val);
549         inb(0x84);              /* XXX work around wrong order in rtcin() */
550         crit_exit();
551 }
552
553 static __inline int
554 readrtc(int port)
555 {
556         return(bcd2bin(rtcin(port)));
557 }
558
559 static u_int
560 calibrate_clocks(void)
561 {
562         tsc_uclock_t old_tsc;
563         u_int tot_count;
564         sysclock_t count, prev_count;
565         int sec, start_sec, timeout;
566
567         if (bootverbose)
568                 kprintf("Calibrating clock(s) ...\n");
569         if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
570                 goto fail;
571         timeout = 100000000;
572
573         /* Read the mc146818A seconds counter. */
574         for (;;) {
575                 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
576                         sec = rtcin(RTC_SEC);
577                         break;
578                 }
579                 if (--timeout == 0)
580                         goto fail;
581         }
582
583         /* Wait for the mC146818A seconds counter to change. */
584         start_sec = sec;
585         for (;;) {
586                 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
587                         sec = rtcin(RTC_SEC);
588                         if (sec != start_sec)
589                                 break;
590                 }
591                 if (--timeout == 0)
592                         goto fail;
593         }
594
595         /* Start keeping track of the i8254 counter. */
596         prev_count = sys_cputimer->count();
597         tot_count = 0;
598
599         if (tsc_present) 
600                 old_tsc = rdtsc();
601         else
602                 old_tsc = 0;            /* shut up gcc */
603
604         /*
605          * Wait for the mc146818A seconds counter to change.  Read the i8254
606          * counter for each iteration since this is convenient and only
607          * costs a few usec of inaccuracy. The timing of the final reads
608          * of the counters almost matches the timing of the initial reads,
609          * so the main cause of inaccuracy is the varying latency from 
610          * inside getit() or rtcin(RTC_STATUSA) to the beginning of the
611          * rtcin(RTC_SEC) that returns a changed seconds count.  The
612          * maximum inaccuracy from this cause is < 10 usec on 486's.
613          */
614         start_sec = sec;
615         for (;;) {
616                 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP))
617                         sec = rtcin(RTC_SEC);
618                 count = sys_cputimer->count();
619                 tot_count += (int)(count - prev_count);
620                 prev_count = count;
621                 if (sec != start_sec)
622                         break;
623                 if (--timeout == 0)
624                         goto fail;
625         }
626
627         /*
628          * Read the cpu cycle counter.  The timing considerations are
629          * similar to those for the i8254 clock.
630          */
631         if (tsc_present) {
632                 tsc_frequency = rdtsc() - old_tsc;
633                 if (bootverbose) {
634                         kprintf("TSC clock: %jd Hz (Method A)\n",
635                             (intmax_t)tsc_frequency);
636                 }
637         }
638         tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
639
640         kprintf("i8254 clock: %u Hz\n", tot_count);
641         return (tot_count);
642
643 fail:
644         kprintf("failed, using default i8254 clock of %u Hz\n",
645                 i8254_cputimer.freq);
646         return (i8254_cputimer.freq);
647 }
648
649 static void
650 i8254_restore(void)
651 {
652         timer0_state = ACQUIRED;
653
654         clock_lock();
655
656         /*
657          * Timer0 is our fine-grained variable clock interrupt
658          */
659         outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
660         outb(TIMER_CNTR0, 2);   /* lsb */
661         outb(TIMER_CNTR0, 0);   /* msb */
662         clock_unlock();
663
664         if (!i8254_nointr) {
665                 cputimer_intr_register(&i8254_cputimer_intr);
666                 cputimer_intr_select(&i8254_cputimer_intr, 0);
667         }
668
669         /*
670          * Timer1 or timer2 is our free-running clock, but only if another
671          * has not been selected.
672          */
673         cputimer_register(&i8254_cputimer);
674         cputimer_select(&i8254_cputimer, 0);
675 }
676
677 static void
678 i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
679 {
680         int which;
681
682         /*
683          * Should we use timer 1 or timer 2 ?
684          */
685         which = 0;
686         TUNABLE_INT_FETCH("hw.i8254.walltimer", &which);
687         if (which != 1 && which != 2)
688                 which = 2;
689
690         switch(which) {
691         case 1:
692                 timer->name = "i8254_timer1";
693                 timer->type = CPUTIMER_8254_SEL1;
694                 i8254_walltimer_sel = TIMER_SEL1;
695                 i8254_walltimer_cntr = TIMER_CNTR1;
696                 timer1_state = ACQUIRED;
697                 break;
698         case 2:
699                 timer->name = "i8254_timer2";
700                 timer->type = CPUTIMER_8254_SEL2;
701                 i8254_walltimer_sel = TIMER_SEL2;
702                 i8254_walltimer_cntr = TIMER_CNTR2;
703                 timer2_state = ACQUIRED;
704                 break;
705         }
706
707         timer->base = (oldclock + 0xFFFF) & ~0xFFFF;
708
709         clock_lock();
710         outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT);
711         outb(i8254_walltimer_cntr, 0);  /* lsb */
712         outb(i8254_walltimer_cntr, 0);  /* msb */
713         outb(IO_PPI, inb(IO_PPI) | 1);  /* bit 0: enable gate, bit 1: spkr */
714         clock_unlock();
715 }
716
717 static void
718 i8254_cputimer_destruct(struct cputimer *timer)
719 {
720         switch(timer->type) {
721         case CPUTIMER_8254_SEL1:
722             timer1_state = RELEASED;
723             break;
724         case CPUTIMER_8254_SEL2:
725             timer2_state = RELEASED;
726             break;
727         default:
728             break;
729         }
730         timer->type = 0;
731 }
732
733 static void
734 rtc_restore(void)
735 {
736         /* Restore all of the RTC's "status" (actually, control) registers. */
737         writertc(RTC_STATUSB, RTCSB_24HR);
738         writertc(RTC_STATUSA, rtc_statusa);
739         writertc(RTC_STATUSB, rtc_statusb);
740 }
741
742 /*
743  * Restore all the timers.
744  *
745  * This function is called to resynchronize our core timekeeping after a
746  * long halt, e.g. from apm_default_resume() and friends.  It is also 
747  * called if after a BIOS call we have detected munging of the 8254.
748  * It is necessary because cputimer_count() counter's delta may have grown
749  * too large for nanouptime() and friends to handle, or (in the case of 8254
750  * munging) might cause the SYSTIMER code to prematurely trigger.
751  */
752 void
753 timer_restore(void)
754 {
755         crit_enter();
756         i8254_restore();                /* restore timer_freq and hz */
757         rtc_restore();                  /* reenable RTC interrupts */
758         crit_exit();
759 }
760
761 /*
762  * Initialize 8254 timer 0 early so that it can be used in DELAY().
763  */
764 void
765 startrtclock(void)
766 {
767         u_int delta, freq;
768
769         /* 
770          * Can we use the TSC?
771          *
772          * NOTE: If running under qemu, probably a good idea to force the
773          *       TSC because we are not likely to detect it as being
774          *       invariant or mpsyncd if you don't.  This will greatly
775          *       reduce SMP contention.
776          */
777         if (cpu_feature & CPUID_TSC) {
778                 tsc_present = 1;
779                 TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant);
780
781                 if ((cpu_vendor_id == CPU_VENDOR_INTEL ||
782                      cpu_vendor_id == CPU_VENDOR_AMD) &&
783                     cpu_exthigh >= 0x80000007) {
784                         u_int regs[4];
785
786                         do_cpuid(0x80000007, regs);
787                         if (regs[3] & 0x100)
788                                 tsc_invariant = 1;
789                 }
790         } else {
791                 tsc_present = 0;
792         }
793
794         /*
795          * Initial RTC state, don't do anything unexpected
796          */
797         writertc(RTC_STATUSA, rtc_statusa);
798         writertc(RTC_STATUSB, RTCSB_24HR);
799
800         /*
801          * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to 
802          * generate an interrupt, which we will ignore for now.
803          *
804          * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000
805          * (so it counts a full 2^16 and repeats).  We will use this timer
806          * for our counting.
807          */
808         i8254_restore();
809
810         /*
811          * When booting without verbose messages, it's pointless to run the
812          * calibrate_clocks() calibration code, when we don't use the
813          * results in any way. With bootverbose, we are at least printing
814          *  this information to the kernel log.
815          */
816         if (calibrate_timers_with_rtc == 0 && !bootverbose)
817                 goto skip_rtc_based;
818
819         freq = calibrate_clocks();
820 #ifdef CLK_CALIBRATION_LOOP
821         if (bootverbose) {
822                 int c;
823
824                 cnpoll(TRUE);
825                 kprintf("Press a key on the console to "
826                         "abort clock calibration\n");
827                 while ((c = cncheckc()) == -1 || c == NOKEY)
828                         calibrate_clocks();
829                 cnpoll(FALSE);
830         }
831 #endif
832
833         /*
834          * Use the calibrated i8254 frequency if it seems reasonable.
835          * Otherwise use the default, and don't use the calibrated i586
836          * frequency.
837          */
838         delta = freq > i8254_cputimer.freq ? 
839                         freq - i8254_cputimer.freq : i8254_cputimer.freq - freq;
840         if (delta < i8254_cputimer.freq / 100) {
841                 if (calibrate_timers_with_rtc == 0) {
842                         kprintf(
843 "hw.calibrate_timers_with_rtc not set - using default i8254 frequency\n");
844                         freq = i8254_cputimer.freq;
845                 }
846                 /*
847                  * NOTE:
848                  * Interrupt timer's freq must be adjusted
849                  * before we change the cuptimer's frequency.
850                  */
851                 i8254_cputimer_intr.freq = freq;
852                 cputimer_set_frequency(&i8254_cputimer, freq);
853         } else {
854                 if (bootverbose)
855                         kprintf("%d Hz differs from default of %d Hz "
856                                 "by more than 1%%\n",
857                                 freq, i8254_cputimer.freq);
858                 tsc_frequency = 0;
859         }
860
861         if (tsc_frequency != 0 && calibrate_timers_with_rtc == 0) {
862                 kprintf("hw.calibrate_timers_with_rtc not "
863                         "set - using old calibration method\n");
864                 tsc_frequency = 0;
865         }
866
867 skip_rtc_based:
868         if (tsc_present && tsc_frequency == 0) {
869                 /*
870                  * Calibration of the i586 clock relative to the mc146818A
871                  * clock failed.  Do a less accurate calibration relative
872                  * to the i8254 clock.
873                  */
874                 u_int64_t old_tsc = rdtsc();
875
876                 DELAY(1000000);
877                 tsc_frequency = rdtsc() - old_tsc;
878                 if (bootverbose && calibrate_timers_with_rtc) {
879                         kprintf("TSC clock: %jd Hz (Method B)\n",
880                             (intmax_t)tsc_frequency);
881                 }
882         }
883
884         if (tsc_present) {
885                 kprintf("TSC%s clock: %jd Hz\n",
886                     tsc_invariant ? " invariant" : "",
887                     (intmax_t)tsc_frequency);
888         }
889         tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
890
891         EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown,
892                               NULL, SHUTDOWN_PRI_LAST);
893 }
894
895 /*
896  * Sync the time of day back to the RTC on shutdown, but only if
897  * we have already loaded it and have not crashed.
898  */
899 static void
900 resettodr_on_shutdown(void *arg __unused)
901 {
902         if (rtc_loaded && panicstr == NULL) {
903                 resettodr();
904         }
905 }
906
907 /*
908  * Initialize the time of day register, based on the time base which is, e.g.
909  * from a filesystem.
910  */
911 void
912 inittodr(time_t base)
913 {
914         unsigned long   sec, days;
915         int             year, month;
916         int             y, m;
917         struct timespec ts;
918
919         if (base) {
920                 ts.tv_sec = base;
921                 ts.tv_nsec = 0;
922                 set_timeofday(&ts);
923         }
924
925         /* Look if we have a RTC present and the time is valid */
926         if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
927                 goto wrong_time;
928
929         /* wait for time update to complete */
930         /* If RTCSA_TUP is zero, we have at least 244us before next update */
931         crit_enter();
932         while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
933                 crit_exit();
934                 crit_enter();
935         }
936
937         days = 0;
938 #ifdef USE_RTC_CENTURY
939         year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
940 #else
941         year = readrtc(RTC_YEAR) + 1900;
942         if (year < 1970)
943                 year += 100;
944 #endif
945         if (year < 1970) {
946                 crit_exit();
947                 goto wrong_time;
948         }
949         month = readrtc(RTC_MONTH);
950         for (m = 1; m < month; m++)
951                 days += daysinmonth[m-1];
952         if ((month > 2) && LEAPYEAR(year))
953                 days ++;
954         days += readrtc(RTC_DAY) - 1;
955         for (y = 1970; y < year; y++)
956                 days += DAYSPERYEAR + LEAPYEAR(y);
957         sec = ((( days * 24 +
958                   readrtc(RTC_HRS)) * 60 +
959                   readrtc(RTC_MIN)) * 60 +
960                   readrtc(RTC_SEC));
961         /* sec now contains the number of seconds, since Jan 1 1970,
962            in the local time zone */
963
964         sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
965
966         y = (int)(time_second - sec);
967         if (y <= -2 || y >= 2) {
968                 /* badly off, adjust it */
969                 ts.tv_sec = sec;
970                 ts.tv_nsec = 0;
971                 set_timeofday(&ts);
972         }
973         rtc_loaded = 1;
974         crit_exit();
975         return;
976
977 wrong_time:
978         kprintf("Invalid time in real time clock.\n");
979         kprintf("Check and reset the date immediately!\n");
980 }
981
982 /*
983  * Write system time back to RTC
984  */
985 void
986 resettodr(void)
987 {
988         struct timeval tv;
989         unsigned long tm;
990         int m;
991         int y;
992
993         if (disable_rtc_set)
994                 return;
995
996         microtime(&tv);
997         tm = tv.tv_sec;
998
999         crit_enter();
1000         /* Disable RTC updates and interrupts. */
1001         writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
1002
1003         /* Calculate local time to put in RTC */
1004
1005         tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1006
1007         writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60;    /* Write back Seconds */
1008         writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60;    /* Write back Minutes */
1009         writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24;    /* Write back Hours   */
1010
1011         /* We have now the days since 01-01-1970 in tm */
1012         writertc(RTC_WDAY, (tm+4)%7);                   /* Write back Weekday */
1013         for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
1014              tm >= m;
1015              y++,      m = DAYSPERYEAR + LEAPYEAR(y))
1016              tm -= m;
1017
1018         /* Now we have the years in y and the day-of-the-year in tm */
1019         writertc(RTC_YEAR, bin2bcd(y%100));             /* Write back Year    */
1020 #ifdef USE_RTC_CENTURY
1021         writertc(RTC_CENTURY, bin2bcd(y/100));          /* ... and Century    */
1022 #endif
1023         for (m = 0; ; m++) {
1024                 int ml;
1025
1026                 ml = daysinmonth[m];
1027                 if (m == 1 && LEAPYEAR(y))
1028                         ml++;
1029                 if (tm < ml)
1030                         break;
1031                 tm -= ml;
1032         }
1033
1034         writertc(RTC_MONTH, bin2bcd(m + 1));            /* Write back Month   */
1035         writertc(RTC_DAY, bin2bcd(tm + 1));             /* Write back Month Day */
1036
1037         /* Reenable RTC updates and interrupts. */
1038         writertc(RTC_STATUSB, rtc_statusb);
1039         crit_exit();
1040 }
1041
1042 static int
1043 i8254_ioapic_trial(int irq, struct cputimer_intr *cti)
1044 {
1045         sysclock_t base;
1046         long lastcnt;
1047
1048         /*
1049          * Following code assumes the 8254 is the cpu timer,
1050          * so make sure it is.
1051          */
1052         KKASSERT(sys_cputimer == &i8254_cputimer);
1053         KKASSERT(cti == &i8254_cputimer_intr);
1054
1055         lastcnt = get_interrupt_counter(irq, mycpuid);
1056
1057         /*
1058          * Force an 8254 Timer0 interrupt and wait 1/100s for
1059          * it to happen, then see if we got it.
1060          */
1061         kprintf("IOAPIC: testing 8254 interrupt delivery\n");
1062
1063         i8254_intr_reload(cti, 2);
1064         base = sys_cputimer->count();
1065         while (sys_cputimer->count() - base < sys_cputimer->freq / 100)
1066                 ; /* nothing */
1067
1068         if (get_interrupt_counter(irq, mycpuid) - lastcnt == 0)
1069                 return ENOENT;
1070         return 0;
1071 }
1072
1073 /*
1074  * Start both clocks running.  DragonFly note: the stat clock is no longer
1075  * used.  Instead, 8254 based systimers are used for all major clock
1076  * interrupts.
1077  */
1078 static void
1079 i8254_intr_initclock(struct cputimer_intr *cti, boolean_t selected)
1080 {
1081         void *clkdesc = NULL;
1082         int irq = 0, mixed_mode = 0, error;
1083
1084         KKASSERT(mycpuid == 0);
1085         callout_init_mp(&sysbeepstop_ch);
1086
1087         if (!selected && i8254_intr_disable)
1088                 goto nointr;
1089
1090         /*
1091          * The stat interrupt mask is different without the
1092          * statistics clock.  Also, don't set the interrupt
1093          * flag which would normally cause the RTC to generate
1094          * interrupts.
1095          */
1096         rtc_statusb = RTCSB_24HR;
1097
1098         /* Finish initializing 8254 timer 0. */
1099         if (ioapic_enable) {
1100                 irq = machintr_legacy_intr_find(0, INTR_TRIGGER_EDGE,
1101                         INTR_POLARITY_HIGH);
1102                 if (irq < 0) {
1103 mixed_mode_setup:
1104                         error = ioapic_conf_legacy_extint(0);
1105                         if (!error) {
1106                                 irq = machintr_legacy_intr_find(0,
1107                                     INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH);
1108                                 if (irq < 0)
1109                                         error = ENOENT;
1110                         }
1111
1112                         if (error) {
1113                                 if (!selected) {
1114                                         kprintf("IOAPIC: setup mixed mode for "
1115                                                 "irq 0 failed: %d\n", error);
1116                                         goto nointr;
1117                                 } else {
1118                                         panic("IOAPIC: setup mixed mode for "
1119                                               "irq 0 failed: %d\n", error);
1120                                 }
1121                         }
1122                         mixed_mode = 1;
1123                 }
1124                 clkdesc = register_int(irq, clkintr, NULL, "clk",
1125                                        NULL,
1126                                        INTR_EXCL | INTR_CLOCK |
1127                                        INTR_NOPOLL | INTR_MPSAFE |
1128                                        INTR_NOENTROPY, 0);
1129         } else {
1130                 register_int(0, clkintr, NULL, "clk", NULL,
1131                              INTR_EXCL | INTR_CLOCK |
1132                              INTR_NOPOLL | INTR_MPSAFE |
1133                              INTR_NOENTROPY, 0);
1134         }
1135
1136         /* Initialize RTC. */
1137         writertc(RTC_STATUSA, rtc_statusa);
1138         writertc(RTC_STATUSB, RTCSB_24HR);
1139
1140         if (ioapic_enable) {
1141                 error = i8254_ioapic_trial(irq, cti);
1142                 if (error) {
1143                         if (mixed_mode) {
1144                                 if (!selected) {
1145                                         kprintf("IOAPIC: mixed mode for irq %d "
1146                                                 "trial failed: %d\n",
1147                                                 irq, error);
1148                                         goto nointr;
1149                                 } else {
1150                                         panic("IOAPIC: mixed mode for irq %d "
1151                                               "trial failed: %d\n", irq, error);
1152                                 }
1153                         } else {
1154                                 kprintf("IOAPIC: warning 8254 is not connected "
1155                                         "to the correct pin, try mixed mode\n");
1156                                 unregister_int(clkdesc, 0);
1157                                 goto mixed_mode_setup;
1158                         }
1159                 }
1160         }
1161         return;
1162
1163 nointr:
1164         i8254_nointr = 1; /* don't try to register again */
1165         cputimer_intr_deregister(cti);
1166 }
1167
1168 void
1169 setstatclockrate(int newhz)
1170 {
1171         if (newhz == RTC_PROFRATE)
1172                 rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF;
1173         else
1174                 rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
1175         writertc(RTC_STATUSA, rtc_statusa);
1176 }
1177
1178 #if 0
1179 static unsigned
1180 tsc_get_timecount(struct timecounter *tc)
1181 {
1182         return (rdtsc());
1183 }
1184 #endif
1185
1186 #ifdef KERN_TIMESTAMP
1187 #define KERN_TIMESTAMP_SIZE 16384
1188 static u_long tsc[KERN_TIMESTAMP_SIZE] ;
1189 SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc,
1190         sizeof(tsc), "LU", "Kernel timestamps");
1191 void  
1192 _TSTMP(u_int32_t x)
1193 {
1194         static int i;
1195
1196         tsc[i] = (u_int32_t)rdtsc();
1197         tsc[i+1] = x;
1198         i = i + 2;
1199         if (i >= KERN_TIMESTAMP_SIZE)
1200                 i = 0;
1201         tsc[i] = 0; /* mark last entry */
1202 }
1203 #endif /* KERN_TIMESTAMP */
1204
1205 /*
1206  *
1207  */
1208
1209 static int
1210 hw_i8254_timestamp(SYSCTL_HANDLER_ARGS)
1211 {
1212     sysclock_t count;
1213     uint64_t tscval;
1214     char buf[32];
1215
1216     crit_enter();
1217     if (sys_cputimer == &i8254_cputimer)
1218         count = sys_cputimer->count();
1219     else
1220         count = 0;
1221     if (tsc_present)
1222         tscval = rdtsc();
1223     else
1224         tscval = 0;
1225     crit_exit();
1226     ksnprintf(buf, sizeof(buf), "%08x %016llx", count, (long long)tscval);
1227     return(SYSCTL_OUT(req, buf, strlen(buf) + 1));
1228 }
1229
1230 struct tsc_mpsync_arg {
1231         volatile uint64_t       tsc_target;
1232         volatile int            tsc_mpsync;
1233 };
1234
1235 struct tsc_mpsync_thr {
1236         volatile int            tsc_done_cnt;
1237         volatile int            tsc_mpsync_cnt;
1238 };
1239
1240 static void
1241 tsc_mpsync_test_remote(void *xarg)
1242 {
1243         struct tsc_mpsync_arg *arg = xarg;
1244         uint64_t tsc;
1245
1246         tsc = rdtsc_ordered();
1247         if (tsc < arg->tsc_target)
1248                 arg->tsc_mpsync = 0;
1249 }
1250
1251 static void
1252 tsc_mpsync_test_loop(struct tsc_mpsync_arg *arg)
1253 {
1254         struct globaldata *gd = mycpu;
1255         tsc_uclock_t test_end, test_begin;
1256         u_int i;
1257
1258         if (bootverbose) {
1259                 kprintf("cpu%d: TSC testing MP synchronization ...\n",
1260                     gd->gd_cpuid);
1261         }
1262
1263         test_begin = rdtsc_ordered();
1264         /* Run test for 100ms */
1265         test_end = test_begin + (tsc_frequency / 10);
1266
1267         arg->tsc_mpsync = 1;
1268         arg->tsc_target = test_begin;
1269
1270 #define TSC_TEST_TRYMAX         1000000 /* Make sure we could stop */
1271 #define TSC_TEST_TRYMIN         50000
1272
1273         for (i = 0; i < TSC_TEST_TRYMAX; ++i) {
1274                 struct lwkt_cpusync cs;
1275
1276                 crit_enter();
1277                 lwkt_cpusync_init(&cs, gd->gd_other_cpus,
1278                     tsc_mpsync_test_remote, arg);
1279                 lwkt_cpusync_interlock(&cs);
1280                 arg->tsc_target = rdtsc_ordered();
1281                 cpu_mfence();
1282                 lwkt_cpusync_deinterlock(&cs);
1283                 crit_exit();
1284
1285                 if (!arg->tsc_mpsync) {
1286                         kprintf("cpu%d: TSC is not MP synchronized @%u\n",
1287                             gd->gd_cpuid, i);
1288                         break;
1289                 }
1290                 if (arg->tsc_target > test_end && i >= TSC_TEST_TRYMIN)
1291                         break;
1292         }
1293
1294 #undef TSC_TEST_TRYMIN
1295 #undef TSC_TEST_TRYMAX
1296
1297         if (arg->tsc_target == test_begin) {
1298                 kprintf("cpu%d: TSC does not tick?!\n", gd->gd_cpuid);
1299                 /* XXX disable TSC? */
1300                 tsc_invariant = 0;
1301                 arg->tsc_mpsync = 0;
1302                 return;
1303         }
1304
1305         if (arg->tsc_mpsync && bootverbose) {
1306                 kprintf("cpu%d: TSC is MP synchronized after %u tries\n",
1307                     gd->gd_cpuid, i);
1308         }
1309 }
1310
1311 static void
1312 tsc_mpsync_ap_thread(void *xthr)
1313 {
1314         struct tsc_mpsync_thr *thr = xthr;
1315         struct tsc_mpsync_arg arg;
1316
1317         tsc_mpsync_test_loop(&arg);
1318         if (arg.tsc_mpsync) {
1319                 atomic_add_int(&thr->tsc_mpsync_cnt, 1);
1320                 cpu_sfence();
1321         }
1322         atomic_add_int(&thr->tsc_done_cnt, 1);
1323
1324         lwkt_exit();
1325 }
1326
1327 static void
1328 tsc_mpsync_test(void)
1329 {
1330         struct tsc_mpsync_arg arg;
1331
1332         if (!tsc_invariant) {
1333                 /* Not even invariant TSC */
1334                 return;
1335         }
1336
1337         if (ncpus == 1) {
1338                 /* Only one CPU */
1339                 tsc_mpsync = 1;
1340                 return;
1341         }
1342
1343         /*
1344          * Forcing can be used w/qemu to reduce contention
1345          */
1346         TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync);
1347
1348         if (tsc_mpsync == 0) {
1349                 switch(cpu_vendor_id) {
1350                 case CPU_VENDOR_INTEL:
1351                         /*
1352                          * Intel probably works
1353                          */
1354                         break;
1355                 case CPU_VENDOR_AMD:
1356                         /*
1357                          * AMD < Ryzen probably doesn't work
1358                          */
1359                         if (CPUID_TO_FAMILY(cpu_id) < 0x17)
1360                                 return;
1361                         break;
1362                 default:
1363                         /* probably won't work */
1364                         return;
1365                 }
1366         }
1367
1368         /*
1369          * Test even if forced above.  If forced, we will use the TSC
1370          * even if the test fails.
1371          */
1372         kprintf("TSC testing MP synchronization ...\n");
1373
1374         tsc_mpsync_test_loop(&arg);
1375         if (arg.tsc_mpsync) {
1376                 struct tsc_mpsync_thr thr;
1377                 int cpu;
1378
1379                 /*
1380                  * Test TSC MP synchronization on APs.
1381                  */
1382
1383                 thr.tsc_done_cnt = 1;
1384                 thr.tsc_mpsync_cnt = 1;
1385
1386                 for (cpu = 0; cpu < ncpus; ++cpu) {
1387                         if (cpu == mycpuid)
1388                                 continue;
1389
1390                         lwkt_create(tsc_mpsync_ap_thread, &thr, NULL,
1391                             NULL, 0, cpu, "tsc mpsync %d", cpu);
1392                 }
1393
1394                 while (thr.tsc_done_cnt != ncpus) {
1395                         cpu_pause();
1396                         cpu_lfence();
1397                 }
1398                 if (thr.tsc_mpsync_cnt == ncpus)
1399                         tsc_mpsync = 1;
1400         }
1401
1402         if (tsc_mpsync)
1403                 kprintf("TSC is MP synchronized\n");
1404         else
1405                 kprintf("TSC is not MP synchronized\n");
1406 }
1407 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL);
1408
1409 #define TSC_CPUTIMER_FREQMAX    128000000       /* 128Mhz */
1410
1411 static int tsc_cputimer_shift;
1412
1413 static void
1414 tsc_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
1415 {
1416         timer->base = 0;
1417         timer->base = oldclock - timer->count();
1418 }
1419
1420 static __inline sysclock_t
1421 tsc_cputimer_count(void)
1422 {
1423         uint64_t tsc;
1424
1425         tsc = rdtsc();
1426         tsc >>= tsc_cputimer_shift;
1427
1428         return (tsc + tsc_cputimer.base);
1429 }
1430
1431 static sysclock_t
1432 tsc_cputimer_count_lfence(void)
1433 {
1434         cpu_lfence();
1435         return tsc_cputimer_count();
1436 }
1437
1438 static sysclock_t
1439 tsc_cputimer_count_mfence(void)
1440 {
1441         cpu_mfence();
1442         return tsc_cputimer_count();
1443 }
1444
1445 static uint64_t
1446 tsc_cpucounter_count_lfence(void)
1447 {
1448
1449         cpu_lfence();
1450         return (rdtsc());
1451 }
1452
1453 static uint64_t
1454 tsc_cpucounter_count_mfence(void)
1455 {
1456
1457         cpu_mfence();
1458         return (rdtsc());
1459 }
1460
1461 static void
1462 tsc_cputimer_register(void)
1463 {
1464         uint64_t freq;
1465         int enable = 1;
1466
1467         if (!tsc_mpsync) {
1468                 if (tsc_invariant) {
1469                         /* Per-cpu cpucounter still works. */
1470                         goto regcnt;
1471                 }
1472                 return;
1473         }
1474
1475         TUNABLE_INT_FETCH("hw.tsc_cputimer_enable", &enable);
1476         if (!enable)
1477                 return;
1478
1479         freq = tsc_frequency;
1480         while (freq > TSC_CPUTIMER_FREQMAX) {
1481                 freq >>= 1;
1482                 ++tsc_cputimer_shift;
1483         }
1484         kprintf("TSC: cputimer freq %ju, shift %d\n",
1485             (uintmax_t)freq, tsc_cputimer_shift);
1486
1487         tsc_cputimer.freq = freq;
1488
1489         if (cpu_vendor_id == CPU_VENDOR_INTEL)
1490                 tsc_cputimer.count = tsc_cputimer_count_lfence;
1491         else
1492                 tsc_cputimer.count = tsc_cputimer_count_mfence; /* safe bet */
1493
1494         cputimer_register(&tsc_cputimer);
1495         cputimer_select(&tsc_cputimer, 0);
1496
1497         tsc_cpucounter.flags |= CPUCOUNTER_FLAG_MPSYNC;
1498 regcnt:
1499         tsc_cpucounter.freq = tsc_frequency;
1500         if (cpu_vendor_id == CPU_VENDOR_INTEL) {
1501                 tsc_cpucounter.count =
1502                     tsc_cpucounter_count_lfence;
1503         } else {
1504                 tsc_cpucounter.count =
1505                     tsc_cpucounter_count_mfence; /* safe bet */
1506         }
1507         cpucounter_register(&tsc_cpucounter);
1508 }
1509 SYSINIT(tsc_cputimer_reg, SI_BOOT2_POST_SMP, SI_ORDER_FIRST,
1510         tsc_cputimer_register, NULL);
1511
1512 SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254");
1513 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0,
1514             "frequency");
1515 SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD,
1516             0, 0, hw_i8254_timestamp, "A", "");
1517
1518 SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD,
1519             &tsc_present, 0, "TSC Available");
1520 SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD,
1521             &tsc_invariant, 0, "Invariant TSC");
1522 SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD,
1523             &tsc_mpsync, 0, "TSC is synchronized across CPUs");
1524 SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD,
1525             &tsc_frequency, 0, "TSC Frequency");