Merge branch 'vendor/FILE'
[dragonfly.git] / sys / platform / pc64 / isa / clock.c
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * Copyright (c) 2008-2021 The DragonFly Project.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * William Jolitz and Don Ahn.
7  *
8  * This code is derived from software contributed to The DragonFly Project
9  * by Matthew Dillon <dillon@backplane.com>
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *      from: @(#)clock.c       7.2 (Berkeley) 5/12/91
36  * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $
37  */
38
39 /*
40  * Routines to handle clock hardware.
41  */
42
43 /*
44  * inittodr, settodr and support routines written
45  * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at>
46  *
47  * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94
48  */
49
50 #if 0
51 #include "opt_clock.h"
52 #endif
53
54 #include <sys/param.h>
55 #include <sys/systm.h>
56 #include <sys/eventhandler.h>
57 #include <sys/time.h>
58 #include <sys/kernel.h>
59 #include <sys/bus.h>
60 #include <sys/sysctl.h>
61 #include <sys/cons.h>
62 #include <sys/kbio.h>
63 #include <sys/systimer.h>
64 #include <sys/globaldata.h>
65 #include <sys/machintr.h>
66 #include <sys/interrupt.h>
67
68 #include <sys/thread2.h>
69
70 #include <machine/clock.h>
71 #include <machine/cputypes.h>
72 #include <machine/frame.h>
73 #include <machine/ipl.h>
74 #include <machine/limits.h>
75 #include <machine/md_var.h>
76 #include <machine/psl.h>
77 #include <machine/segments.h>
78 #include <machine/smp.h>
79 #include <machine/specialreg.h>
80 #include <machine/intr_machdep.h>
81
82 #include <machine_base/apic/ioapic.h>
83 #include <machine_base/apic/ioapic_abi.h>
84 #include <machine_base/icu/icu.h>
85 #include <bus/isa/isa.h>
86 #include <bus/isa/rtc.h>
87 #include <machine_base/isa/timerreg.h>
88
89 SET_DECLARE(timecounter_init_set, const timecounter_init_t);
90 TIMECOUNTER_INIT(placeholder, NULL);
91
92 static void i8254_restore(void);
93 static void resettodr_on_shutdown(void *arg __unused);
94
95 /*
96  * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
97  * can use a simple formula for leap years.
98  */
99 #define LEAPYEAR(y) ((u_int)(y) % 4 == 0)
100 #define DAYSPERYEAR   (31+28+31+30+31+30+31+31+30+31+30+31)
101
102 #ifndef TIMER_FREQ
103 #define TIMER_FREQ   1193182
104 #endif
105
106 static uint8_t i8254_walltimer_sel;
107 static uint16_t i8254_walltimer_cntr;
108 static int timer0_running;
109
110 int     adjkerntz;              /* local offset from GMT in seconds */
111 int     disable_rtc_set;        /* disable resettodr() if != 0 */
112 int     tsc_present;
113 int     tsc_invariant;
114 int     tsc_mpsync;
115 int     wall_cmos_clock;        /* wall CMOS clock assumed if != 0 */
116 tsc_uclock_t tsc_frequency;
117 tsc_uclock_t tsc_oneus_approx;  /* always at least 1, approx only */
118
119 enum tstate { RELEASED, ACQUIRED };
120 static enum tstate timer0_state;
121 static enum tstate timer1_state;
122 static enum tstate timer2_state;
123
124 int     i8254_cputimer_disable; /* No need to initialize i8254 cputimer. */
125
126 static  int     beeping = 0;
127 static  const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
128 static  u_char  rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
129 static  u_char  rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
130 static  int     rtc_loaded;
131
132 static  sysclock_t i8254_cputimer_div;
133
134 static int i8254_nointr;
135 static int i8254_intr_disable = 1;
136 TUNABLE_INT("hw.i8254.intr_disable", &i8254_intr_disable);
137
138 static int calibrate_timers_with_rtc = 0;
139 TUNABLE_INT("hw.calibrate_timers_with_rtc", &calibrate_timers_with_rtc);
140
141 static int calibrate_tsc_fast = 1;
142 TUNABLE_INT("hw.calibrate_tsc_fast", &calibrate_tsc_fast);
143
144 static int calibrate_test;
145 TUNABLE_INT("hw.tsc_calibrate_test", &calibrate_test);
146
147 static struct callout sysbeepstop_ch;
148
149 static sysclock_t i8254_cputimer_count(void);
150 static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last);
151 static void i8254_cputimer_destruct(struct cputimer *cputimer);
152
153 static struct cputimer  i8254_cputimer = {
154     .next               = SLIST_ENTRY_INITIALIZER,
155     .name               = "i8254",
156     .pri                = CPUTIMER_PRI_8254,
157     .type               = 0,    /* determined later */
158     .count              = i8254_cputimer_count,
159     .fromhz             = cputimer_default_fromhz,
160     .fromus             = cputimer_default_fromus,
161     .construct          = i8254_cputimer_construct,
162     .destruct           = i8254_cputimer_destruct,
163     .freq               = TIMER_FREQ
164 };
165
166 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t);
167 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *);
168 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t);
169
170 static struct cputimer_intr i8254_cputimer_intr = {
171     .freq = TIMER_FREQ,
172     .reload = i8254_intr_reload,
173     .enable = cputimer_intr_default_enable,
174     .config = i8254_intr_config,
175     .restart = cputimer_intr_default_restart,
176     .pmfixup = cputimer_intr_default_pmfixup,
177     .initclock = i8254_intr_initclock,
178     .pcpuhand = NULL,
179     .next = SLIST_ENTRY_INITIALIZER,
180     .name = "i8254",
181     .type = CPUTIMER_INTR_8254,
182     .prio = CPUTIMER_INTR_PRIO_8254,
183     .caps = CPUTIMER_INTR_CAP_PS,
184     .priv = NULL
185 };
186
187 /*
188  * Use this to lwkt_switch() when the scheduler clock is not
189  * yet running, otherwise lwkt_switch() won't do anything.
190  * XXX needs cleaning up in lwkt_thread.c
191  */
192 static void
193 lwkt_force_switch(void)
194 {
195         crit_enter();
196         lwkt_schedulerclock(curthread);
197         crit_exit();
198         lwkt_switch();
199 }
200
201 /*
202  * timer0 clock interrupt.  Timer0 is in one-shot mode and has stopped
203  * counting as of this interrupt.  We use timer1 in free-running mode (not
204  * generating any interrupts) as our main counter.  Each cpu has timeouts
205  * pending.
206  *
207  * This code is INTR_MPSAFE and may be called without the BGL held.
208  */
209 static void
210 clkintr(void *dummy, void *frame_arg)
211 {
212         static sysclock_t sysclock_count;       /* NOTE! Must be static */
213         struct globaldata *gd = mycpu;
214         struct globaldata *gscan;
215         int n;
216
217         /*
218          * SWSTROBE mode is a one-shot, the timer is no longer running
219          */
220         timer0_running = 0;
221
222         /*
223          * XXX the dispatcher needs work.  right now we call systimer_intr()
224          * directly or via IPI for any cpu with systimers queued, which is
225          * usually *ALL* of them.  We need to use the LAPIC timer for this.
226          */
227         sysclock_count = sys_cputimer->count();
228         for (n = 0; n < ncpus; ++n) {
229             gscan = globaldata_find(n);
230             if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL)
231                 continue;
232             if (gscan != gd) {
233                 lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr, 
234                                 &sysclock_count, 1);
235             } else {
236                 systimer_intr(&sysclock_count, 0, frame_arg);
237             }
238         }
239 }
240
241
242 /*
243  * NOTE! not MP safe.
244  */
245 int
246 acquire_timer2(int mode)
247 {
248         if (timer2_state != RELEASED)
249                 return (-1);
250         timer2_state = ACQUIRED;
251
252         /*
253          * This access to the timer registers is as atomic as possible
254          * because it is a single instruction.  We could do better if we
255          * knew the rate.
256          */
257         outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f));
258         return (0);
259 }
260
261 int
262 release_timer2(void)
263 {
264         if (timer2_state != ACQUIRED)
265                 return (-1);
266         outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT);
267         timer2_state = RELEASED;
268         return (0);
269 }
270
271 #include "opt_ddb.h"
272 #ifdef DDB
273 #include <ddb/ddb.h>
274
275 DB_SHOW_COMMAND(rtc, rtc)
276 {
277         kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n",
278                rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY),
279                rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC),
280                rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR));
281 }
282 #endif /* DDB */
283
284 /*
285  * Return the current cpu timer count as a 32 bit integer.
286  */
287 static
288 sysclock_t
289 i8254_cputimer_count(void)
290 {
291         static uint16_t cputimer_last;
292         uint16_t count;
293         sysclock_t ret;
294
295         clock_lock();
296         outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH);
297         count = (uint8_t)inb(i8254_walltimer_cntr);     /* get countdown */
298         count |= ((uint8_t)inb(i8254_walltimer_cntr) << 8);
299         count = -count;                                 /* -> countup */
300         if (count < cputimer_last)                      /* rollover */
301                 i8254_cputimer.base += 0x00010000U;
302         ret = i8254_cputimer.base | count;
303         cputimer_last = count;
304         clock_unlock();
305
306         return(ret);
307 }
308
309 /*
310  * This function is called whenever the system timebase changes, allowing
311  * us to calculate what is needed to convert a system timebase tick 
312  * into an 8254 tick for the interrupt timer.  If we can convert to a
313  * simple shift, multiplication, or division, we do so.  Otherwise 64
314  * bit arithmatic is required every time the interrupt timer is reloaded.
315  */
316 static void
317 i8254_intr_config(struct cputimer_intr *cti, const struct cputimer *timer)
318 {
319     sysclock_t freq;
320     sysclock_t div;
321
322     /*
323      * Will a simple divide do the trick?
324      */
325     div = (timer->freq + (cti->freq / 2)) / cti->freq;
326     freq = cti->freq * div;
327
328     if (freq >= timer->freq - 1 && freq <= timer->freq + 1)
329         i8254_cputimer_div = div;
330     else
331         i8254_cputimer_div = 0;
332 }
333
334 /*
335  * Reload for the next timeout.  It is possible for the reload value
336  * to be 0 or negative, indicating that an immediate timer interrupt
337  * is desired.  For now make the minimum 2 ticks.
338  *
339  * We may have to convert from the system timebase to the 8254 timebase.
340  */
341 static void
342 i8254_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
343 {
344     uint16_t count;
345
346     if ((ssysclock_t)reload < 0)
347             reload = 1;
348     if (i8254_cputimer_div)
349         reload /= i8254_cputimer_div;
350     else
351         reload = muldivu64(reload, cti->freq, sys_cputimer->freq);
352
353     if (reload < 2)
354         reload = 2;             /* minimum count */
355     if (reload > 0xFFFF)
356         reload = 0xFFFF;        /* almost full count (0 is full count) */
357
358     clock_lock();
359     if (timer0_running) {
360         outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);     /* count-down timer */
361         count = (uint8_t)inb(TIMER_CNTR0);              /* lsb */
362         count |= ((uint8_t)inb(TIMER_CNTR0) << 8);      /* msb */
363         if (reload < count) {
364             outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
365             outb(TIMER_CNTR0, (uint8_t)reload);         /* lsb */
366             outb(TIMER_CNTR0, (uint8_t)(reload >> 8));  /* msb */
367         }
368     } else {
369         timer0_running = 1;
370         outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
371         outb(TIMER_CNTR0, (uint8_t)reload);             /* lsb */
372         outb(TIMER_CNTR0, (uint8_t)(reload >> 8));      /* msb */
373     }
374     clock_unlock();
375 }
376
377 /*
378  * DELAY(usec)       - Spin for the specified number of microseconds.
379  * DRIVERSLEEP(usec) - Spin for the specified number of microseconds,
380  *                     but do a thread switch in the loop
381  *
382  * Relies on timer 1 counting down from (cputimer_freq / hz)
383  * Note: timer had better have been programmed before this is first used!
384  */
385 static void
386 DODELAY(int n, int doswitch)
387 {
388         ssysclock_t delta, ticks_left;
389         sysclock_t prev_tick, tick;
390
391 #ifdef DELAYDEBUG
392         int getit_calls = 1;
393         int n1;
394         static int state = 0;
395
396         if (state == 0) {
397                 state = 1;
398                 for (n1 = 1; n1 <= 10000000; n1 *= 10)
399                         DELAY(n1);
400                 state = 2;
401         }
402         if (state == 1)
403                 kprintf("DELAY(%d)...", n);
404 #endif
405         /*
406          * Guard against the timer being uninitialized if we are called
407          * early for console i/o.
408          */
409         if (timer0_state == RELEASED && i8254_cputimer_disable == 0)
410                 i8254_restore();
411
412         /*
413          * Read the counter first, so that the rest of the setup overhead is
414          * counted.  Then calculate the number of hardware timer ticks
415          * required, rounding up to be sure we delay at least the requested
416          * number of microseconds.
417          */
418         prev_tick = sys_cputimer->count();
419         ticks_left = muldivu64(n, sys_cputimer->freq + 999999, 1000000);
420
421         /*
422          * Loop until done.
423          */
424         while (ticks_left > 0) {
425                 tick = sys_cputimer->count();
426 #ifdef DELAYDEBUG
427                 ++getit_calls;
428 #endif
429                 delta = tick - prev_tick;
430                 prev_tick = tick;
431                 if (delta < 0)
432                         delta = 0;
433                 ticks_left -= delta;
434                 if (doswitch && ticks_left > 0)
435                         lwkt_switch();
436                 cpu_pause();
437         }
438 #ifdef DELAYDEBUG
439         if (state == 1)
440                 kprintf(" %d calls to getit() at %d usec each\n",
441                        getit_calls, (n + 5) / getit_calls);
442 #endif
443 }
444
445 /*
446  * DELAY() never switches.
447  */
448 void
449 DELAY(int n)
450 {
451         DODELAY(n, 0);
452 }
453
454 /*
455  * Returns non-zero if the specified time period has elapsed.  Call
456  * first with last_clock set to 0.
457  */
458 int
459 CHECKTIMEOUT(TOTALDELAY *tdd)
460 {
461         sysclock_t delta;
462         int us;
463
464         if (tdd->started == 0) {
465                 if (timer0_state == RELEASED && i8254_cputimer_disable == 0)
466                         i8254_restore();
467                 tdd->last_clock = sys_cputimer->count();
468                 tdd->started = 1;
469                 return(0);
470         }
471         delta = sys_cputimer->count() - tdd->last_clock;
472         us = muldivu64(delta, 1000000, sys_cputimer->freq);
473         tdd->last_clock += muldivu64(us, sys_cputimer->freq, 1000000);
474         tdd->us -= us;
475
476         return (tdd->us < 0);
477 }
478
479
480 /*
481  * DRIVERSLEEP() does not switch if called with a spinlock held or
482  * from a hard interrupt.
483  */
484 void
485 DRIVERSLEEP(int usec)
486 {
487         globaldata_t gd = mycpu;
488
489         if (gd->gd_intr_nesting_level || gd->gd_spinlocks) {
490                 DODELAY(usec, 0);
491         } else {
492                 DODELAY(usec, 1);
493         }
494 }
495
496 static void
497 sysbeepstop(void *chan)
498 {
499         outb(IO_PPI, inb(IO_PPI)&0xFC); /* disable counter2 output to speaker */
500         beeping = 0;
501         release_timer2();
502 }
503
504 int
505 sysbeep(int pitch, int period)
506 {
507         if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
508                 return(-1);
509         if (sysbeep_enable == 0)
510                 return(-1);
511         /*
512          * Nobody else is using timer2, we do not need the clock lock
513          */
514         outb(TIMER_CNTR2, pitch);
515         outb(TIMER_CNTR2, (pitch>>8));
516         if (!beeping) {
517                 /* enable counter2 output to speaker */
518                 outb(IO_PPI, inb(IO_PPI) | 3);
519                 beeping = period;
520                 callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL);
521         }
522         return (0);
523 }
524
525 /*
526  * RTC support routines
527  */
528
529 int
530 rtcin(int reg)
531 {
532         u_char val;
533
534         crit_enter();
535         outb(IO_RTC, reg);
536         inb(0x84);
537         val = inb(IO_RTC + 1);
538         inb(0x84);
539         crit_exit();
540         return (val);
541 }
542
543 static __inline void
544 writertc(u_char reg, u_char val)
545 {
546         crit_enter();
547         inb(0x84);
548         outb(IO_RTC, reg);
549         inb(0x84);
550         outb(IO_RTC + 1, val);
551         inb(0x84);              /* XXX work around wrong order in rtcin() */
552         crit_exit();
553 }
554
555 static __inline int
556 readrtc(int port)
557 {
558         return(bcd2bin(rtcin(port)));
559 }
560
561 static u_int
562 calibrate_clocks(void)
563 {
564         tsc_uclock_t old_tsc;
565         sysclock_t tot_count;
566         sysclock_t count, prev_count;
567         int sec, start_sec, timeout;
568
569         if (bootverbose)
570                 kprintf("Calibrating clock(s) ...\n");
571         if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
572                 goto fail;
573         timeout = 100000000;
574
575         /* Read the mc146818A seconds counter. */
576         for (;;) {
577                 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
578                         sec = rtcin(RTC_SEC);
579                         break;
580                 }
581                 if (--timeout == 0)
582                         goto fail;
583         }
584
585         /* Wait for the mC146818A seconds counter to change. */
586         start_sec = sec;
587         for (;;) {
588                 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
589                         sec = rtcin(RTC_SEC);
590                         if (sec != start_sec)
591                                 break;
592                 }
593                 if (--timeout == 0)
594                         goto fail;
595         }
596
597         /* Start keeping track of the i8254 counter. */
598         prev_count = sys_cputimer->count();
599         tot_count = 0;
600
601         if (tsc_present) 
602                 old_tsc = rdtsc();
603         else
604                 old_tsc = 0;            /* shut up gcc */
605
606         /*
607          * Wait for the mc146818A seconds counter to change.  Read the i8254
608          * counter for each iteration since this is convenient and only
609          * costs a few usec of inaccuracy. The timing of the final reads
610          * of the counters almost matches the timing of the initial reads,
611          * so the main cause of inaccuracy is the varying latency from 
612          * inside getit() or rtcin(RTC_STATUSA) to the beginning of the
613          * rtcin(RTC_SEC) that returns a changed seconds count.  The
614          * maximum inaccuracy from this cause is < 10 usec on 486's.
615          */
616         start_sec = sec;
617         for (;;) {
618                 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP))
619                         sec = rtcin(RTC_SEC);
620                 count = sys_cputimer->count();
621                 tot_count += (sysclock_t)(count - prev_count);
622                 prev_count = count;
623                 if (sec != start_sec)
624                         break;
625                 if (--timeout == 0)
626                         goto fail;
627         }
628
629         /*
630          * Read the cpu cycle counter.  The timing considerations are
631          * similar to those for the i8254 clock.
632          */
633         if (tsc_present) {
634                 tsc_frequency = rdtsc() - old_tsc;
635                 if (bootverbose) {
636                         kprintf("TSC clock: %jd Hz (Method A)\n",
637                             (intmax_t)tsc_frequency);
638                 }
639         }
640         tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
641
642         kprintf("i8254 clock: %lu Hz\n", tot_count);
643         return (tot_count);
644
645 fail:
646         kprintf("failed, using default i8254 clock of %lu Hz\n",
647                 i8254_cputimer.freq);
648         return (i8254_cputimer.freq);
649 }
650
651 static void
652 i8254_restore(void)
653 {
654         timer0_state = ACQUIRED;
655
656         clock_lock();
657
658         /*
659          * Timer0 is our fine-grained variable clock interrupt
660          */
661         outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
662         outb(TIMER_CNTR0, 2);   /* lsb */
663         outb(TIMER_CNTR0, 0);   /* msb */
664         clock_unlock();
665
666         if (!i8254_nointr) {
667                 cputimer_intr_register(&i8254_cputimer_intr);
668                 cputimer_intr_select(&i8254_cputimer_intr, 0);
669         }
670
671         /*
672          * Timer1 or timer2 is our free-running clock, but only if another
673          * has not been selected.
674          */
675         cputimer_register(&i8254_cputimer);
676         cputimer_select(&i8254_cputimer, 0);
677 }
678
679 static void
680 i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
681 {
682         int which;
683
684         /*
685          * Should we use timer 1 or timer 2 ?
686          */
687         which = 0;
688         TUNABLE_INT_FETCH("hw.i8254.walltimer", &which);
689         if (which != 1 && which != 2)
690                 which = 2;
691
692         switch(which) {
693         case 1:
694                 timer->name = "i8254_timer1";
695                 timer->type = CPUTIMER_8254_SEL1;
696                 i8254_walltimer_sel = TIMER_SEL1;
697                 i8254_walltimer_cntr = TIMER_CNTR1;
698                 timer1_state = ACQUIRED;
699                 break;
700         case 2:
701                 timer->name = "i8254_timer2";
702                 timer->type = CPUTIMER_8254_SEL2;
703                 i8254_walltimer_sel = TIMER_SEL2;
704                 i8254_walltimer_cntr = TIMER_CNTR2;
705                 timer2_state = ACQUIRED;
706                 break;
707         }
708
709         timer->base = (oldclock + 0xFFFF) & 0xFFFFFFFFFFFF0000LU;
710
711         clock_lock();
712         outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT);
713         outb(i8254_walltimer_cntr, 0);  /* lsb */
714         outb(i8254_walltimer_cntr, 0);  /* msb */
715         outb(IO_PPI, inb(IO_PPI) | 1);  /* bit 0: enable gate, bit 1: spkr */
716         clock_unlock();
717 }
718
719 static void
720 i8254_cputimer_destruct(struct cputimer *timer)
721 {
722         switch(timer->type) {
723         case CPUTIMER_8254_SEL1:
724             timer1_state = RELEASED;
725             break;
726         case CPUTIMER_8254_SEL2:
727             timer2_state = RELEASED;
728             break;
729         default:
730             break;
731         }
732         timer->type = 0;
733 }
734
735 static void
736 rtc_restore(void)
737 {
738         /* Restore all of the RTC's "status" (actually, control) registers. */
739         writertc(RTC_STATUSB, RTCSB_24HR);
740         writertc(RTC_STATUSA, rtc_statusa);
741         writertc(RTC_STATUSB, rtc_statusb);
742 }
743
744 /*
745  * Restore all the timers.
746  *
747  * This function is called to resynchronize our core timekeeping after a
748  * long halt, e.g. from apm_default_resume() and friends.  It is also 
749  * called if after a BIOS call we have detected munging of the 8254.
750  * It is necessary because cputimer_count() counter's delta may have grown
751  * too large for nanouptime() and friends to handle, or (in the case of 8254
752  * munging) might cause the SYSTIMER code to prematurely trigger.
753  */
754 void
755 timer_restore(void)
756 {
757         crit_enter();
758         if (i8254_cputimer_disable == 0)
759                 i8254_restore();        /* restore timer_freq and hz */
760         rtc_restore();                  /* reenable RTC interrupts */
761         crit_exit();
762 }
763
764 #define MAX_MEASURE_RETRIES     100
765
766 static u_int64_t
767 do_measure(u_int64_t timer_latency, u_int64_t *latency, sysclock_t *time,
768     int *retries)
769 {
770         u_int64_t tsc1, tsc2;
771         u_int64_t threshold;
772         sysclock_t val;
773         int cnt = 0;
774
775         do {
776                 if (cnt > MAX_MEASURE_RETRIES/2)
777                         threshold = timer_latency << 1;
778                 else
779                         threshold = timer_latency + (timer_latency >> 2);
780
781                 cnt++;
782                 tsc1 = rdtsc_ordered();
783                 val = sys_cputimer->count();
784                 tsc2 = rdtsc_ordered();
785         } while (timer_latency > 0 && cnt < MAX_MEASURE_RETRIES &&
786             tsc2 - tsc1 > threshold);
787
788         *retries = cnt - 1;
789         *latency = tsc2 - tsc1;
790         *time = val;
791         return tsc1;
792 }
793
794 static u_int64_t
795 do_calibrate_cputimer(u_int usecs, u_int64_t timer_latency)
796 {
797         if (calibrate_tsc_fast) {
798                 u_int64_t old_tsc1, start_lat1, new_tsc1, end_lat1;
799                 u_int64_t old_tsc2, start_lat2, new_tsc2, end_lat2;
800                 u_int64_t freq1, freq2;
801                 sysclock_t start1, end1, start2, end2;
802                 int retries1, retries2, retries3, retries4;
803
804                 DELAY(1000);
805                 old_tsc1 = do_measure(timer_latency, &start_lat1, &start1,
806                     &retries1);
807                 DELAY(20000);
808                 old_tsc2 = do_measure(timer_latency, &start_lat2, &start2,
809                     &retries2);
810                 DELAY(usecs);
811                 new_tsc1 = do_measure(timer_latency, &end_lat1, &end1,
812                     &retries3);
813                 DELAY(20000);
814                 new_tsc2 = do_measure(timer_latency, &end_lat2, &end2,
815                     &retries4);
816
817                 old_tsc1 += start_lat1;
818                 old_tsc2 += start_lat2;
819                 freq1 = (new_tsc1 - old_tsc1) + (start_lat1 + end_lat1) / 2;
820                 freq2 = (new_tsc2 - old_tsc2) + (start_lat2 + end_lat2) / 2;
821                 end1 -= start1;
822                 end2 -= start2;
823                 /* This should in practice be safe from overflows. */
824                 freq1 = muldivu64(freq1, sys_cputimer->freq, end1);
825                 freq2 = muldivu64(freq2, sys_cputimer->freq, end2);
826                 if (calibrate_test && (retries1 > 0 || retries2 > 0)) {
827                         kprintf("%s: retries: %d, %d, %d, %d\n",
828                             __func__, retries1, retries2, retries3, retries4);
829                 }
830                 if (calibrate_test) {
831                         kprintf("%s: freq1=%ju freq2=%ju avg=%ju\n",
832                             __func__, freq1, freq2, (freq1 + freq2) / 2);
833                 }
834                 return (freq1 + freq2) / 2;
835         } else {
836                 u_int64_t old_tsc, new_tsc;
837                 u_int64_t freq;
838
839                 old_tsc = rdtsc_ordered();
840                 DELAY(usecs);
841                 new_tsc = rdtsc();
842                 freq = new_tsc - old_tsc;
843                 /* This should in practice be safe from overflows. */
844                 freq = (freq * 1000 * 1000) / usecs;
845                 return freq;
846         }
847 }
848
849 /*
850  * Initialize 8254 timer 0 early so that it can be used in DELAY().
851  */
852 void
853 startrtclock(void)
854 {
855         const timecounter_init_t **list;
856         sysclock_t delta, freq;
857
858         callout_init_mp(&sysbeepstop_ch);
859
860         /* 
861          * Can we use the TSC?
862          *
863          * NOTE: If running under qemu, probably a good idea to force the
864          *       TSC because we are not likely to detect it as being
865          *       invariant or mpsyncd if you don't.  This will greatly
866          *       reduce SMP contention.
867          */
868         if (cpu_feature & CPUID_TSC) {
869                 tsc_present = 1;
870                 TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant);
871
872                 if ((cpu_vendor_id == CPU_VENDOR_INTEL ||
873                      cpu_vendor_id == CPU_VENDOR_AMD) &&
874                     cpu_exthigh >= 0x80000007) {
875                         u_int regs[4];
876
877                         do_cpuid(0x80000007, regs);
878                         if (regs[3] & 0x100)
879                                 tsc_invariant = 1;
880                 }
881         } else {
882                 tsc_present = 0;
883         }
884
885         /*
886          * Initial RTC state, don't do anything unexpected
887          */
888         writertc(RTC_STATUSA, rtc_statusa);
889         writertc(RTC_STATUSB, RTCSB_24HR);
890
891         SET_FOREACH(list, timecounter_init_set) {
892                 if ((*list)->configure != NULL)
893                         (*list)->configure();
894         }
895
896         /*
897          * If tsc_frequency is already initialized now, and a flag is set
898          * that i8254 timer is unneeded, we are done.
899          */
900         if (tsc_frequency != 0 && i8254_cputimer_disable != 0)
901                 goto done;
902
903         /*
904          * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to 
905          * generate an interrupt, which we will ignore for now.
906          *
907          * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000
908          * (so it counts a full 2^16 and repeats).  We will use this timer
909          * for our counting.
910          */
911         if (i8254_cputimer_disable == 0)
912                 i8254_restore();
913
914         kprintf("Using cputimer %s for TSC calibration\n", sys_cputimer->name);
915
916         /*
917          * When booting without verbose messages, it's pointless to run the
918          * calibrate_clocks() calibration code, when we don't use the
919          * results in any way. With bootverbose, we are at least printing
920          *  this information to the kernel log.
921          */
922         if (i8254_cputimer_disable != 0 ||
923             (calibrate_timers_with_rtc == 0 && !bootverbose)) {
924                 goto skip_rtc_based;
925         }
926
927         freq = calibrate_clocks();
928 #ifdef CLK_CALIBRATION_LOOP
929         if (bootverbose) {
930                 int c;
931
932                 cnpoll(TRUE);
933                 kprintf("Press a key on the console to "
934                         "abort clock calibration\n");
935                 while ((c = cncheckc()) == -1 || c == NOKEY)
936                         calibrate_clocks();
937                 cnpoll(FALSE);
938         }
939 #endif
940
941         /*
942          * Use the calibrated i8254 frequency if it seems reasonable.
943          * Otherwise use the default, and don't use the calibrated i586
944          * frequency.
945          */
946         delta = freq > i8254_cputimer.freq ? 
947                 freq - i8254_cputimer.freq : i8254_cputimer.freq - freq;
948         if (delta < i8254_cputimer.freq / 100) {
949                 if (calibrate_timers_with_rtc == 0) {
950                         kprintf(
951 "hw.calibrate_timers_with_rtc not set - using default i8254 frequency\n");
952                         freq = i8254_cputimer.freq;
953                 }
954                 /*
955                  * NOTE:
956                  * Interrupt timer's freq must be adjusted
957                  * before we change the cuptimer's frequency.
958                  */
959                 i8254_cputimer_intr.freq = freq;
960                 cputimer_set_frequency(&i8254_cputimer, freq);
961         } else {
962                 if (bootverbose)
963                         kprintf("%lu Hz differs from default of %lu Hz "
964                                 "by more than 1%%\n",
965                                 freq, i8254_cputimer.freq);
966                 tsc_frequency = 0;
967         }
968
969         if (tsc_frequency != 0 && calibrate_timers_with_rtc == 0) {
970                 kprintf("hw.calibrate_timers_with_rtc not "
971                         "set - using old calibration method\n");
972                 tsc_frequency = 0;
973         }
974
975 skip_rtc_based:
976         if (tsc_present && tsc_frequency == 0) {
977                 u_int cnt;
978                 u_int64_t cputime_latency_tsc = 0, max = 0, min = 0;
979                 int i;
980
981                 for (i = 0; i < 10; i++) {
982                         /* Warm up */
983                         (void)sys_cputimer->count();
984                 }
985                 for (i = 0; i < 100; i++) {
986                         u_int64_t old_tsc, new_tsc;
987
988                         old_tsc = rdtsc_ordered();
989                         (void)sys_cputimer->count();
990                         new_tsc = rdtsc_ordered();
991                         cputime_latency_tsc += (new_tsc - old_tsc);
992                         if (max < (new_tsc - old_tsc))
993                                 max = new_tsc - old_tsc;
994                         if (min == 0 || min > (new_tsc - old_tsc))
995                                 min = new_tsc - old_tsc;
996                 }
997                 cputime_latency_tsc /= 100;
998                 kprintf(
999                     "Timer latency (in TSC ticks): %lu min=%lu max=%lu\n",
1000                     cputime_latency_tsc, min, max);
1001                 /* XXX Instead of this, properly filter out outliers. */
1002                 cputime_latency_tsc = min;
1003
1004                 if (calibrate_test > 0) {
1005                         u_int64_t values[20], avg = 0;
1006                         for (i = 1; i <= 20; i++) {
1007                                 u_int64_t freq;
1008
1009                                 freq = do_calibrate_cputimer(i * 100 * 1000,
1010                                     cputime_latency_tsc);
1011                                 values[i - 1] = freq;
1012                         }
1013                         /* Compute an average TSC for the 1s to 2s delays. */
1014                         for (i = 10; i < 20; i++)
1015                                 avg += values[i];
1016                         avg /= 10;
1017                         for (i = 0; i < 20; i++) {
1018                                 kprintf("%ums: %lu (Diff from average: %ld)\n",
1019                                     (i + 1) * 100, values[i],
1020                                     (int64_t)(values[i] - avg));
1021                         }
1022                 }
1023
1024                 if (calibrate_tsc_fast > 0) {
1025                         /* HPET would typically be >10MHz */
1026                         if (sys_cputimer->freq >= 10000000)
1027                                 cnt = 200000;
1028                         else
1029                                 cnt = 500000;
1030                 } else {
1031                         cnt = 1000000;
1032                 }
1033
1034                 tsc_frequency = do_calibrate_cputimer(cnt, cputime_latency_tsc);
1035                 if (bootverbose && calibrate_timers_with_rtc) {
1036                         kprintf("TSC clock: %jd Hz (Method B)\n",
1037                             (intmax_t)tsc_frequency);
1038                 }
1039         }
1040
1041 done:
1042         if (tsc_present) {
1043                 kprintf("TSC%s clock: %jd Hz\n",
1044                     tsc_invariant ? " invariant" : "",
1045                     (intmax_t)tsc_frequency);
1046         }
1047         tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
1048
1049         EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown,
1050                               NULL, SHUTDOWN_PRI_LAST);
1051 }
1052
1053 /*
1054  * Sync the time of day back to the RTC on shutdown, but only if
1055  * we have already loaded it and have not crashed.
1056  */
1057 static void
1058 resettodr_on_shutdown(void *arg __unused)
1059 {
1060         if (rtc_loaded && panicstr == NULL) {
1061                 resettodr();
1062         }
1063 }
1064
1065 /*
1066  * Initialize the time of day register, based on the time base which is, e.g.
1067  * from a filesystem.
1068  */
1069 void
1070 inittodr(time_t base)
1071 {
1072         unsigned long   sec, days;
1073         int             year, month;
1074         int             y, m;
1075         struct timespec ts;
1076
1077         if (base) {
1078                 ts.tv_sec = base;
1079                 ts.tv_nsec = 0;
1080                 set_timeofday(&ts);
1081         }
1082
1083         /* Look if we have a RTC present and the time is valid */
1084         if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
1085                 goto wrong_time;
1086
1087         /* wait for time update to complete */
1088         /* If RTCSA_TUP is zero, we have at least 244us before next update */
1089         crit_enter();
1090         while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
1091                 crit_exit();
1092                 crit_enter();
1093         }
1094
1095         days = 0;
1096 #ifdef USE_RTC_CENTURY
1097         year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
1098 #else
1099         year = readrtc(RTC_YEAR) + 1900;
1100         if (year < 1970)
1101                 year += 100;
1102 #endif
1103         if (year < 1970) {
1104                 crit_exit();
1105                 goto wrong_time;
1106         }
1107         month = readrtc(RTC_MONTH);
1108         for (m = 1; m < month; m++)
1109                 days += daysinmonth[m-1];
1110         if ((month > 2) && LEAPYEAR(year))
1111                 days ++;
1112         days += readrtc(RTC_DAY) - 1;
1113         for (y = 1970; y < year; y++)
1114                 days += DAYSPERYEAR + LEAPYEAR(y);
1115         sec = ((( days * 24 +
1116                   readrtc(RTC_HRS)) * 60 +
1117                   readrtc(RTC_MIN)) * 60 +
1118                   readrtc(RTC_SEC));
1119         /* sec now contains the number of seconds, since Jan 1 1970,
1120            in the local time zone */
1121
1122         sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1123
1124         y = (int)(time_second - sec);
1125         if (y <= -2 || y >= 2) {
1126                 /* badly off, adjust it */
1127                 ts.tv_sec = sec;
1128                 ts.tv_nsec = 0;
1129                 set_timeofday(&ts);
1130         }
1131         rtc_loaded = 1;
1132         crit_exit();
1133         return;
1134
1135 wrong_time:
1136         kprintf("Invalid time in real time clock.\n");
1137         kprintf("Check and reset the date immediately!\n");
1138 }
1139
1140 /*
1141  * Write system time back to RTC
1142  */
1143 void
1144 resettodr(void)
1145 {
1146         struct timeval tv;
1147         unsigned long tm;
1148         int m;
1149         int y;
1150
1151         if (disable_rtc_set)
1152                 return;
1153
1154         microtime(&tv);
1155         tm = tv.tv_sec;
1156
1157         crit_enter();
1158         /* Disable RTC updates and interrupts. */
1159         writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
1160
1161         /* Calculate local time to put in RTC */
1162
1163         tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1164
1165         writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60;    /* Write back Seconds */
1166         writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60;    /* Write back Minutes */
1167         writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24;    /* Write back Hours   */
1168
1169         /* We have now the days since 01-01-1970 in tm */
1170         writertc(RTC_WDAY, (tm+4)%7);                   /* Write back Weekday */
1171         for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
1172              tm >= m;
1173              y++,      m = DAYSPERYEAR + LEAPYEAR(y))
1174              tm -= m;
1175
1176         /* Now we have the years in y and the day-of-the-year in tm */
1177         writertc(RTC_YEAR, bin2bcd(y%100));             /* Write back Year    */
1178 #ifdef USE_RTC_CENTURY
1179         writertc(RTC_CENTURY, bin2bcd(y/100));          /* ... and Century    */
1180 #endif
1181         for (m = 0; ; m++) {
1182                 int ml;
1183
1184                 ml = daysinmonth[m];
1185                 if (m == 1 && LEAPYEAR(y))
1186                         ml++;
1187                 if (tm < ml)
1188                         break;
1189                 tm -= ml;
1190         }
1191
1192         writertc(RTC_MONTH, bin2bcd(m + 1));            /* Write back Month   */
1193         writertc(RTC_DAY, bin2bcd(tm + 1));             /* Write back Month Day */
1194
1195         /* Reenable RTC updates and interrupts. */
1196         writertc(RTC_STATUSB, rtc_statusb);
1197         crit_exit();
1198 }
1199
1200 static int
1201 i8254_ioapic_trial(int irq, struct cputimer_intr *cti)
1202 {
1203         sysclock_t base;
1204         long lastcnt;
1205
1206         /*
1207          * Following code assumes the 8254 is the cpu timer,
1208          * so make sure it is.
1209          */
1210         /*KKASSERT(sys_cputimer == &i8254_cputimer); (tested by CuteLarva) */
1211         KKASSERT(cti == &i8254_cputimer_intr);
1212
1213         lastcnt = get_interrupt_counter(irq, mycpuid);
1214
1215         /*
1216          * Force an 8254 Timer0 interrupt and wait 1/100s for
1217          * it to happen, then see if we got it.
1218          */
1219         kprintf("IOAPIC: testing 8254 interrupt delivery...");
1220
1221         i8254_intr_reload(cti, sys_cputimer->fromus(2));
1222         base = sys_cputimer->count();
1223         while (sys_cputimer->count() - base < sys_cputimer->freq / 100)
1224                 ; /* nothing */
1225
1226         if (get_interrupt_counter(irq, mycpuid) - lastcnt == 0) {
1227                 kprintf(" failed\n");
1228                 return ENOENT;
1229         } else {
1230                 kprintf(" success\n");
1231         }
1232         return 0;
1233 }
1234
1235 /*
1236  * Start both clocks running.  DragonFly note: the stat clock is no longer
1237  * used.  Instead, 8254 based systimers are used for all major clock
1238  * interrupts.
1239  */
1240 static void
1241 i8254_intr_initclock(struct cputimer_intr *cti, boolean_t selected)
1242 {
1243         void *clkdesc = NULL;
1244         int irq = 0, mixed_mode = 0, error;
1245
1246         KKASSERT(mycpuid == 0);
1247
1248         if (!selected && i8254_intr_disable)
1249                 goto nointr;
1250
1251         /*
1252          * The stat interrupt mask is different without the
1253          * statistics clock.  Also, don't set the interrupt
1254          * flag which would normally cause the RTC to generate
1255          * interrupts.
1256          */
1257         rtc_statusb = RTCSB_24HR;
1258
1259         /* Finish initializing 8254 timer 0. */
1260         if (ioapic_enable) {
1261                 irq = machintr_legacy_intr_find(0, INTR_TRIGGER_EDGE,
1262                         INTR_POLARITY_HIGH);
1263                 if (irq < 0) {
1264 mixed_mode_setup:
1265                         error = ioapic_conf_legacy_extint(0);
1266                         if (!error) {
1267                                 irq = machintr_legacy_intr_find(0,
1268                                     INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH);
1269                                 if (irq < 0)
1270                                         error = ENOENT;
1271                         }
1272
1273                         if (error) {
1274                                 if (!selected) {
1275                                         kprintf("IOAPIC: setup mixed mode for "
1276                                                 "irq 0 failed: %d\n", error);
1277                                         goto nointr;
1278                                 } else {
1279                                         panic("IOAPIC: setup mixed mode for "
1280                                               "irq 0 failed: %d\n", error);
1281                                 }
1282                         }
1283                         mixed_mode = 1;
1284                 }
1285                 clkdesc = register_int(irq, clkintr, NULL, "clk",
1286                                        NULL,
1287                                        INTR_EXCL | INTR_CLOCK |
1288                                        INTR_NOPOLL | INTR_MPSAFE |
1289                                        INTR_NOENTROPY, 0);
1290         } else {
1291                 register_int(0, clkintr, NULL, "clk", NULL,
1292                              INTR_EXCL | INTR_CLOCK |
1293                              INTR_NOPOLL | INTR_MPSAFE |
1294                              INTR_NOENTROPY, 0);
1295         }
1296
1297         /* Initialize RTC. */
1298         writertc(RTC_STATUSA, rtc_statusa);
1299         writertc(RTC_STATUSB, RTCSB_24HR);
1300
1301         if (ioapic_enable) {
1302                 error = i8254_ioapic_trial(irq, cti);
1303                 if (error) {
1304                         if (mixed_mode) {
1305                                 if (!selected) {
1306                                         kprintf("IOAPIC: mixed mode for irq %d "
1307                                                 "trial failed: %d\n",
1308                                                 irq, error);
1309                                         goto nointr;
1310                                 } else {
1311                                         panic("IOAPIC: mixed mode for irq %d "
1312                                               "trial failed: %d\n", irq, error);
1313                                 }
1314                         } else {
1315                                 kprintf("IOAPIC: warning 8254 is not connected "
1316                                         "to the correct pin, try mixed mode\n");
1317                                 unregister_int(clkdesc, 0);
1318                                 goto mixed_mode_setup;
1319                         }
1320                 }
1321         }
1322         return;
1323
1324 nointr:
1325         i8254_nointr = 1; /* don't try to register again */
1326         cputimer_intr_deregister(cti);
1327 }
1328
1329 void
1330 setstatclockrate(int newhz)
1331 {
1332         if (newhz == RTC_PROFRATE)
1333                 rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF;
1334         else
1335                 rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
1336         writertc(RTC_STATUSA, rtc_statusa);
1337 }
1338
1339 #if 0
1340 static unsigned
1341 tsc_get_timecount(struct timecounter *tc)
1342 {
1343         return (rdtsc());
1344 }
1345 #endif
1346
1347 #ifdef KERN_TIMESTAMP
1348 #define KERN_TIMESTAMP_SIZE 16384
1349 static u_long tsc[KERN_TIMESTAMP_SIZE] ;
1350 SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc,
1351         sizeof(tsc), "LU", "Kernel timestamps");
1352 void  
1353 _TSTMP(u_int32_t x)
1354 {
1355         static int i;
1356
1357         tsc[i] = (u_int32_t)rdtsc();
1358         tsc[i+1] = x;
1359         i = i + 2;
1360         if (i >= KERN_TIMESTAMP_SIZE)
1361                 i = 0;
1362         tsc[i] = 0; /* mark last entry */
1363 }
1364 #endif /* KERN_TIMESTAMP */
1365
1366 /*
1367  *
1368  */
1369
1370 static int
1371 hw_i8254_timestamp(SYSCTL_HANDLER_ARGS)
1372 {
1373     sysclock_t count;
1374     uint64_t tscval;
1375     char buf[32];
1376
1377     crit_enter();
1378     if (sys_cputimer == &i8254_cputimer)
1379         count = sys_cputimer->count();
1380     else
1381         count = 0;
1382     if (tsc_present)
1383         tscval = rdtsc();
1384     else
1385         tscval = 0;
1386     crit_exit();
1387     ksnprintf(buf, sizeof(buf), "%016lx %016lx", count, tscval);
1388     return(SYSCTL_OUT(req, buf, strlen(buf) + 1));
1389 }
1390
1391 struct tsc_mpsync_info {
1392         volatile int            tsc_ready_cnt;
1393         volatile int            tsc_done_cnt;
1394         volatile int            tsc_command;
1395         volatile int            unused01[5];
1396         struct {
1397                 uint64_t        v;
1398                 uint64_t        unused02;
1399         } tsc_saved[MAXCPU];
1400 } __cachealign;
1401
1402 #if 0
1403 static void
1404 tsc_mpsync_test_loop(struct tsc_mpsync_thr *info)
1405 {
1406         struct globaldata *gd = mycpu;
1407         tsc_uclock_t test_end, test_begin;
1408         u_int i;
1409
1410         if (bootverbose) {
1411                 kprintf("cpu%d: TSC testing MP synchronization ...\n",
1412                     gd->gd_cpuid);
1413         }
1414
1415         test_begin = rdtsc_ordered();
1416         /* Run test for 100ms */
1417         test_end = test_begin + (tsc_frequency / 10);
1418
1419         arg->tsc_mpsync = 1;
1420         arg->tsc_target = test_begin;
1421
1422 #define TSC_TEST_TRYMAX         1000000 /* Make sure we could stop */
1423 #define TSC_TEST_TRYMIN         50000
1424
1425         for (i = 0; i < TSC_TEST_TRYMAX; ++i) {
1426                 struct lwkt_cpusync cs;
1427
1428                 crit_enter();
1429                 lwkt_cpusync_init(&cs, gd->gd_other_cpus,
1430                     tsc_mpsync_test_remote, arg);
1431                 lwkt_cpusync_interlock(&cs);
1432                 cpu_pause();
1433                 arg->tsc_target = rdtsc_ordered();
1434                 cpu_mfence();
1435                 lwkt_cpusync_deinterlock(&cs);
1436                 crit_exit();
1437                 cpu_pause();
1438
1439                 if (!arg->tsc_mpsync) {
1440                         kprintf("cpu%d: TSC is not MP synchronized @%u\n",
1441                             gd->gd_cpuid, i);
1442                         break;
1443                 }
1444                 if (arg->tsc_target > test_end && i >= TSC_TEST_TRYMIN)
1445                         break;
1446         }
1447
1448 #undef TSC_TEST_TRYMIN
1449 #undef TSC_TEST_TRYMAX
1450
1451         if (arg->tsc_target == test_begin) {
1452                 kprintf("cpu%d: TSC does not tick?!\n", gd->gd_cpuid);
1453                 /* XXX disable TSC? */
1454                 tsc_invariant = 0;
1455                 arg->tsc_mpsync = 0;
1456                 return;
1457         }
1458
1459         if (arg->tsc_mpsync && bootverbose) {
1460                 kprintf("cpu%d: TSC is MP synchronized after %u tries\n",
1461                     gd->gd_cpuid, i);
1462         }
1463 }
1464
1465 #endif
1466
1467 #define TSC_TEST_COUNT          50000
1468
1469 static void
1470 tsc_mpsync_ap_thread(void *xinfo)
1471 {
1472         struct tsc_mpsync_info *info = xinfo;
1473         int cpu = mycpuid;
1474         int i;
1475
1476         /*
1477          * Tell main loop that we are ready and wait for initiation
1478          */
1479         atomic_add_int(&info->tsc_ready_cnt, 1);
1480         while (info->tsc_command == 0) {
1481                 lwkt_force_switch();
1482         }
1483
1484         /*
1485          * Run test for 10000 loops or until tsc_done_cnt != 0 (another
1486          * cpu has finished its test), then increment done.
1487          */
1488         crit_enter();
1489         for (i = 0; i < TSC_TEST_COUNT && info->tsc_done_cnt == 0; ++i) {
1490                 info->tsc_saved[cpu].v = rdtsc_ordered();
1491         }
1492         crit_exit();
1493         atomic_add_int(&info->tsc_done_cnt, 1);
1494
1495         lwkt_exit();
1496 }
1497
1498 static void
1499 tsc_mpsync_test(void)
1500 {
1501         enum { TSCOK, TSCNEG, TSCSPAN } error = TSCOK;
1502         int cpu;
1503         int try;
1504
1505         if (!tsc_invariant) {
1506                 /* Not even invariant TSC */
1507                 kprintf("TSC is not invariant, "
1508                         "no further tests will be performed\n");
1509                 return;
1510         }
1511
1512         if (ncpus == 1) {
1513                 /* Only one CPU */
1514                 tsc_mpsync = 1;
1515                 return;
1516         }
1517
1518         /*
1519          * Forcing can be used w/qemu to reduce contention
1520          */
1521         TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync);
1522
1523         if (tsc_mpsync == 0) {
1524                 switch (cpu_vendor_id) {
1525                 case CPU_VENDOR_INTEL:
1526                         /*
1527                          * Intel probably works
1528                          */
1529                         break;
1530
1531                 case CPU_VENDOR_AMD:
1532                         /*
1533                          * For AMD 15h and 16h (i.e. The Bulldozer and Jaguar
1534                          * architectures) we have to watch out for
1535                          * Erratum 778:
1536                          *     "Processor Core Time Stamp Counters May
1537                          *      Experience Drift"
1538                          * This Erratum is only listed for cpus in Family
1539                          * 15h < Model 30h and for 16h < Model 30h.
1540                          *
1541                          * AMD < Bulldozer probably doesn't work
1542                          */
1543                         if (CPUID_TO_FAMILY(cpu_id) == 0x15 ||
1544                             CPUID_TO_FAMILY(cpu_id) == 0x16) {
1545                                 if (CPUID_TO_MODEL(cpu_id) < 0x30)
1546                                         return;
1547                         } else if (CPUID_TO_FAMILY(cpu_id) < 0x17) {
1548                                 return;
1549                         }
1550                         break;
1551
1552                 default:
1553                         /* probably won't work */
1554                         return;
1555                 }
1556         } else if (tsc_mpsync < 0) {
1557                 kprintf("TSC MP synchronization test is disabled\n");
1558                 tsc_mpsync = 0;
1559                 return;
1560         }
1561
1562         /*
1563          * Test even if forced to 1 above.  If forced, we will use the TSC
1564          * even if the test fails.  (set forced to -1 to disable entirely).
1565          */
1566         kprintf("TSC testing MP synchronization ...\n");
1567         kprintf("TSC testing MP: NOTE! CPU pwrsave will inflate latencies!\n");
1568
1569         /*
1570          * Test that the TSC is monotonically increasing across CPU
1571          * switches.  Otherwise time will get really messed up if the
1572          * TSC is selected as the timebase.
1573          *
1574          * Test 4 times
1575          */
1576         for (try = 0; tsc_frequency && try < 4; ++try) {
1577                 tsc_uclock_t last;
1578                 tsc_uclock_t next;
1579                 tsc_sclock_t delta;
1580                 tsc_sclock_t lo_delta = 0x7FFFFFFFFFFFFFFFLL;
1581                 tsc_sclock_t hi_delta = -0x7FFFFFFFFFFFFFFFLL;
1582
1583                 last = rdtsc();
1584                 for (cpu = 0; cpu < ncpus; ++cpu) {
1585                         lwkt_migratecpu(cpu);
1586                         next = rdtsc();
1587                         if (cpu == 0) {
1588                                 last = next;
1589                                 continue;
1590                         }
1591
1592                         delta = next - last;
1593                         if (delta < 0) {
1594                                 kprintf("TSC cpu-delta NEGATIVE: "
1595                                         "cpu %d to %d (%ld)\n",
1596                                         cpu - 1, cpu, delta);
1597                                 error = TSCNEG;
1598                         }
1599                         if (lo_delta > delta)
1600                                 lo_delta = delta;
1601                         if (hi_delta < delta)
1602                                 hi_delta = delta;
1603                         last = next;
1604                 }
1605                 last = rdtsc();
1606                 for (cpu = ncpus - 2; cpu >= 0; --cpu) {
1607                         lwkt_migratecpu(cpu);
1608                         next = rdtsc();
1609                         delta = next - last;
1610                         if (delta <= 0) {
1611                                 kprintf("TSC cpu-delta WAS NEGATIVE! "
1612                                         "cpu %d to %d (%ld)\n",
1613                                         cpu + 1, cpu, delta);
1614                                 error = TSCNEG;
1615                         }
1616                         if (lo_delta > delta)
1617                                 lo_delta = delta;
1618                         if (hi_delta < delta)
1619                                 hi_delta = delta;
1620                         last = next;
1621                 }
1622                 kprintf("TSC cpu-delta test complete, %ldnS to %ldnS ",
1623                         muldivu64(lo_delta, 1000000000, tsc_frequency),
1624                         muldivu64(hi_delta, 1000000000, tsc_frequency));
1625                 if (error != TSCOK) {
1626                         kprintf("FAILURE\n");
1627                         break;
1628                 }
1629                 kprintf("SUCCESS\n");
1630         }
1631
1632         /*
1633          * Test TSC MP synchronization on APs.
1634          *
1635          * Test 4 times.
1636          */
1637         for (try = 0; tsc_frequency && try < 4; ++try) {
1638                 struct tsc_mpsync_info info;
1639                 uint64_t last;
1640                 int64_t xworst;
1641                 int64_t xdelta;
1642                 int64_t delta;
1643
1644                 bzero(&info, sizeof(info));
1645
1646                 for (cpu = 0; cpu < ncpus; ++cpu) {
1647                         thread_t td;
1648                         lwkt_create(tsc_mpsync_ap_thread, &info, &td,
1649                                     NULL, TDF_NOSTART, cpu,
1650                                     "tsc mpsync %d", cpu);
1651                         lwkt_setpri_initial(td, curthread->td_pri);
1652                         lwkt_schedule(td);
1653                 }
1654                 while (info.tsc_ready_cnt != ncpus)
1655                         lwkt_force_switch();
1656
1657                 /*
1658                  * All threads are ready, start the test and wait for
1659                  * completion.
1660                  */
1661                 info.tsc_command = 1;
1662                 while (info.tsc_done_cnt != ncpus)
1663                         lwkt_force_switch();
1664
1665                 /*
1666                  * Process results
1667                  */
1668                 last = info.tsc_saved[0].v;
1669                 delta = 0;
1670                 xworst = 0;
1671                 for (cpu = 0; cpu < ncpus; ++cpu) {
1672                         xdelta = (int64_t)(info.tsc_saved[cpu].v - last);
1673                         last = info.tsc_saved[cpu].v;
1674                         if (xdelta < 0)
1675                                 xdelta = -xdelta;
1676                         if (xworst < xdelta)
1677                                 xworst = xdelta;
1678                         delta += xdelta;
1679
1680                 }
1681
1682                 /*
1683                  * Result from attempt.  Break-out if we succeeds, otherwise
1684                  * try again (up to 4 times).  This might be in a VM so we
1685                  * need to be robust.
1686                  */
1687                 kprintf("TSC cpu concurrency test complete, worst=%ldns, "
1688                         "avg=%ldns ",
1689                         muldivu64(xworst, 1000000000, tsc_frequency),
1690                         muldivu64(delta / ncpus, 1000000000, tsc_frequency));
1691                 if (delta / ncpus > tsc_frequency / 100) {
1692                         kprintf("FAILURE\n");
1693                 }
1694                 if (delta / ncpus < tsc_frequency / 100000) {
1695                         kprintf("SUCCESS\n");
1696                         if (error == TSCOK)
1697                                 tsc_mpsync = 1;
1698                         break;
1699                 }
1700                 kprintf("INDETERMINATE\n");
1701         }
1702
1703         if (tsc_mpsync)
1704                 kprintf("TSC is MP synchronized\n");
1705         else
1706                 kprintf("TSC is not MP synchronized\n");
1707 }
1708 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL);
1709
1710 static SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254");
1711 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0,
1712             "frequency");
1713 SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD,
1714             0, 0, hw_i8254_timestamp, "A", "");
1715
1716 SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD,
1717             &tsc_present, 0, "TSC Available");
1718 SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD,
1719             &tsc_invariant, 0, "Invariant TSC");
1720 SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD,
1721             &tsc_mpsync, 0, "TSC is synchronized across CPUs");
1722 SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD,
1723             &tsc_frequency, 0, "TSC Frequency");