kernel - Factor out TSC cputimer into common x86_64 code, use for vkernel.
authorImre Vadász <imre@vdsz.com>
Tue, 25 Dec 2018 15:02:38 +0000 (16:02 +0100)
committerImre Vadász <imre@vdsz.com>
Thu, 27 Dec 2018 09:20:35 +0000 (10:20 +0100)
* This adds a command line flag -T to the vkernel, to force disable use of
  the TSC cputimer.

* By default the TSC will be used as a cputimer for the vkernel when the
  TSC is invariant and mpsync according to the hw.tsc_invariant and
  hw.tsc_mpsync sysctl values of the host.

share/man/man7/vkernel.7
sys/cpu/x86_64/misc/cputimer_tsc.c [new file with mode: 0644]
sys/platform/pc64/conf/files
sys/platform/pc64/isa/clock.c
sys/platform/vkernel64/conf/files
sys/platform/vkernel64/platform/init.c
sys/platform/vkernel64/platform/systimer.c

index e0cdf66..e934659 100644 (file)
@@ -279,12 +279,16 @@ swap space until the vkernel is shut down.
 Boot into single-user mode.
 .It Fl t
 Tell the vkernel to use a precise host timer when calculating clock values.
-This will impose higher overhead on the vkernel as it will have to make
-a system call to the real host every time it wants to get the time.
+If the TSC isn't used, this will impose higher overhead on the vkernel as it
+will have to make a system call to the real host every time it wants to get
+the time.
 However, the more precise timer might be necessary for your application.
 .Pp
-By default, the vkernel uses an imprecise (host-tick-resolution) timer
-which uses a user-mapped kernel page and does not have any syscall overhead.
+By default, the vkernel uses the TSC cpu timer if possible, or an imprecise
+(host-tick-resolution) timer which uses a user-mapped kernel page and doe
+ not have any syscall overhead.
+.It Fl T
+Force the vkernel to not use the TSC cpu timer.
 .It Fl U
 Enable writing to kernel memory and module loading.
 By default, those are disabled for security reasons.
diff --git a/sys/cpu/x86_64/misc/cputimer_tsc.c b/sys/cpu/x86_64/misc/cputimer_tsc.c
new file mode 100644 (file)
index 0000000..1cfb84f
--- /dev/null
@@ -0,0 +1,192 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * Copyright (c) 2008 The DragonFly Project.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz and Don Ahn.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     from: @(#)clock.c       7.2 (Berkeley) 5/12/91
+ * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $
+ */
+
+/*
+ * TSC cputimer.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/systimer.h>
+#include <sys/globaldata.h>
+
+#include <machine/clock.h>
+#include <machine/cputypes.h>
+
+static sysclock_t tsc_cputimer_count_mfence(void);
+static sysclock_t tsc_cputimer_count_lfence(void);
+static void tsc_cputimer_construct(struct cputimer *, sysclock_t);
+
+static struct cputimer tsc_cputimer = {
+    .next              = SLIST_ENTRY_INITIALIZER,
+    .name              = "TSC",
+    .pri               = CPUTIMER_PRI_TSC,
+    .type              = CPUTIMER_TSC,
+    .count             = NULL, /* determined later */
+    .fromhz            = cputimer_default_fromhz,
+    .fromus            = cputimer_default_fromus,
+    .construct         = tsc_cputimer_construct,
+    .destruct          = cputimer_default_destruct,
+    .freq              = 0     /* determined later */
+};
+
+static struct cpucounter tsc_cpucounter = {
+    .freq              = 0,    /* determined later */
+    .count             = NULL, /* determined later */
+    .flags             = 0,    /* adjusted later */
+    .prio              = CPUCOUNTER_PRIO_TSC,
+    .type              = CPUCOUNTER_TSC
+};
+
+#ifdef _KERNEL_VIRTUAL
+extern int allow_tsc_timer;
+
+#define TSC_CPUTIMER_FREQMAX   2000000 /* 2MHz */
+#else
+#define TSC_CPUTIMER_FREQMAX   128000000       /* 128Mhz */
+#endif
+
+static int tsc_cputimer_shift;
+
+static void
+tsc_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
+{
+       timer->base = 0;
+       timer->base = oldclock - timer->count();
+}
+
+static __inline sysclock_t
+tsc_cputimer_count(void)
+{
+       uint64_t tsc;
+
+       tsc = rdtsc();
+       tsc >>= tsc_cputimer_shift;
+
+       return (tsc + tsc_cputimer.base);
+}
+
+static sysclock_t
+tsc_cputimer_count_lfence(void)
+{
+       cpu_lfence();
+       return tsc_cputimer_count();
+}
+
+static sysclock_t
+tsc_cputimer_count_mfence(void)
+{
+       cpu_mfence();
+       return tsc_cputimer_count();
+}
+
+static uint64_t
+tsc_cpucounter_count_lfence(void)
+{
+
+       cpu_lfence();
+       return (rdtsc());
+}
+
+static uint64_t
+tsc_cpucounter_count_mfence(void)
+{
+
+       cpu_mfence();
+       return (rdtsc());
+}
+
+static void
+tsc_cputimer_register(void)
+{
+       uint64_t freq;
+       int enable = 1;
+
+#ifdef _KERNEL_VIRTUAL
+       if (!allow_tsc_timer)
+               return;
+#endif
+
+       if (!tsc_mpsync) {
+#ifndef _KERNEL_VIRTUAL
+               if (tsc_invariant) {
+                       /* Per-cpu cpucounter still works. */
+                       goto regcnt;
+               }
+#endif
+               return;
+       }
+
+       TUNABLE_INT_FETCH("hw.tsc_cputimer_enable", &enable);
+       if (!enable)
+               return;
+
+       freq = tsc_frequency;
+       while (freq > TSC_CPUTIMER_FREQMAX) {
+               freq >>= 1;
+               ++tsc_cputimer_shift;
+       }
+       kprintf("TSC: cputimer freq %ju, shift %d\n",
+           (uintmax_t)freq, tsc_cputimer_shift);
+
+       tsc_cputimer.freq = freq;
+
+       if (cpu_vendor_id == CPU_VENDOR_INTEL)
+               tsc_cputimer.count = tsc_cputimer_count_lfence;
+       else
+               tsc_cputimer.count = tsc_cputimer_count_mfence; /* safe bet */
+
+       cputimer_register(&tsc_cputimer);
+       cputimer_select(&tsc_cputimer, 0);
+
+       tsc_cpucounter.flags |= CPUCOUNTER_FLAG_MPSYNC;
+#ifndef _KERNEL_VIRTUAL
+regcnt:
+#endif
+       tsc_cpucounter.freq = tsc_frequency;
+       if (cpu_vendor_id == CPU_VENDOR_INTEL) {
+               tsc_cpucounter.count =
+                   tsc_cpucounter_count_lfence;
+       } else {
+               tsc_cpucounter.count =
+                   tsc_cpucounter_count_mfence; /* safe bet */
+       }
+       cpucounter_register(&tsc_cpucounter);
+}
+SYSINIT(tsc_cputimer_reg, SI_BOOT2_POST_SMP, SI_ORDER_FIRST,
+       tsc_cputimer_register, NULL);
index a44de54..6956929 100644 (file)
@@ -119,6 +119,7 @@ vfs/smbfs/smbfs_vnops.c             optional        smbfs
 
 cpu/x86_64/misc/atomic.c               standard                        \
        compile-with    "${NORMAL_C} ${empty(DEFINED_PROF):?-fomit-frame-pointer:} ${WERROR}"
+cpu/x86_64/misc/cputimer_tsc.c         standard
 platform/pc64/x86_64/autoconf.c        standard
 platform/pc64/x86_64/mpboot.S  standard
 
index 7416850..096d65e 100644 (file)
@@ -162,31 +162,6 @@ static struct cputimer     i8254_cputimer = {
     .freq              = TIMER_FREQ
 };
 
-static sysclock_t tsc_cputimer_count_mfence(void);
-static sysclock_t tsc_cputimer_count_lfence(void);
-static void tsc_cputimer_construct(struct cputimer *, sysclock_t);
-
-static struct cputimer tsc_cputimer = {
-    .next              = SLIST_ENTRY_INITIALIZER,
-    .name              = "TSC",
-    .pri               = CPUTIMER_PRI_TSC,
-    .type              = CPUTIMER_TSC,
-    .count             = NULL, /* determined later */
-    .fromhz            = cputimer_default_fromhz,
-    .fromus            = cputimer_default_fromus,
-    .construct         = tsc_cputimer_construct,
-    .destruct          = cputimer_default_destruct,
-    .freq              = 0     /* determined later */
-};
-
-static struct cpucounter tsc_cpucounter = {
-    .freq              = 0,    /* determined later */
-    .count             = NULL, /* determined later */
-    .flags             = 0,    /* adjusted later */
-    .prio              = CPUCOUNTER_PRIO_TSC,
-    .type              = CPUCOUNTER_TSC
-};
-
 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t);
 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *);
 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t);
@@ -1647,109 +1622,6 @@ tsc_mpsync_test(void)
 }
 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL);
 
-#define TSC_CPUTIMER_FREQMAX   128000000       /* 128Mhz */
-
-static int tsc_cputimer_shift;
-
-static void
-tsc_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
-{
-       timer->base = 0;
-       timer->base = oldclock - timer->count();
-}
-
-static __inline sysclock_t
-tsc_cputimer_count(void)
-{
-       uint64_t tsc;
-
-       tsc = rdtsc();
-       tsc >>= tsc_cputimer_shift;
-
-       return (tsc + tsc_cputimer.base);
-}
-
-static sysclock_t
-tsc_cputimer_count_lfence(void)
-{
-       cpu_lfence();
-       return tsc_cputimer_count();
-}
-
-static sysclock_t
-tsc_cputimer_count_mfence(void)
-{
-       cpu_mfence();
-       return tsc_cputimer_count();
-}
-
-static uint64_t
-tsc_cpucounter_count_lfence(void)
-{
-
-       cpu_lfence();
-       return (rdtsc());
-}
-
-static uint64_t
-tsc_cpucounter_count_mfence(void)
-{
-
-       cpu_mfence();
-       return (rdtsc());
-}
-
-static void
-tsc_cputimer_register(void)
-{
-       uint64_t freq;
-       int enable = 1;
-
-       if (!tsc_mpsync) {
-               if (tsc_invariant) {
-                       /* Per-cpu cpucounter still works. */
-                       goto regcnt;
-               }
-               return;
-       }
-
-       TUNABLE_INT_FETCH("hw.tsc_cputimer_enable", &enable);
-       if (!enable)
-               return;
-
-       freq = tsc_frequency;
-       while (freq > TSC_CPUTIMER_FREQMAX) {
-               freq >>= 1;
-               ++tsc_cputimer_shift;
-       }
-       kprintf("TSC: cputimer freq %ju, shift %d\n",
-           (uintmax_t)freq, tsc_cputimer_shift);
-
-       tsc_cputimer.freq = freq;
-
-       if (cpu_vendor_id == CPU_VENDOR_INTEL)
-               tsc_cputimer.count = tsc_cputimer_count_lfence;
-       else
-               tsc_cputimer.count = tsc_cputimer_count_mfence; /* safe bet */
-
-       cputimer_register(&tsc_cputimer);
-       cputimer_select(&tsc_cputimer, 0);
-
-       tsc_cpucounter.flags |= CPUCOUNTER_FLAG_MPSYNC;
-regcnt:
-       tsc_cpucounter.freq = tsc_frequency;
-       if (cpu_vendor_id == CPU_VENDOR_INTEL) {
-               tsc_cpucounter.count =
-                   tsc_cpucounter_count_lfence;
-       } else {
-               tsc_cpucounter.count =
-                   tsc_cpucounter_count_mfence; /* safe bet */
-       }
-       cpucounter_register(&tsc_cpucounter);
-}
-SYSINIT(tsc_cputimer_reg, SI_BOOT2_POST_SMP, SI_ORDER_FIRST,
-       tsc_cputimer_register, NULL);
-
 SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254");
 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0,
            "frequency");
index af2ffad..5732122 100644 (file)
@@ -14,6 +14,7 @@ vfs/smbfs/smbfs_vfsops.c      optional        smbfs
 vfs/smbfs/smbfs_vnops.c                optional        smbfs
 cpu/x86_64/misc/atomic.c       standard                                \
        compile-with    "${NORMAL_C} ${empty(DEFINED_PROF):?-fomit-frame-pointer:} ${WERROR}"
+cpu/x86_64/misc/cputimer_tsc.c standard
 platform/vkernel64/x86_64/autoconf.c   standard
 platform/vkernel64/x86_64/mp.c         standard                        \
        compile-with    "${NORMAL_C} -pthread -I/usr/include ${WERROR}"
index da95a9d..fdfc87f 100644 (file)
@@ -124,6 +124,7 @@ int vkernel_b_arg;  /* no of logical CPU bits - only SMP */
 int vkernel_B_arg;     /* no of core bits - only SMP */
 int vmm_enabled;       /* VMM HW assisted enable */
 int use_precise_timer = 0;     /* use a precise timer (more expensive) */
+int allow_tsc_timer = 1;       /* use the TSC cpu timer if possible */
 struct privatespace *CPU_prvspace;
 
 tsc_uclock_t tsc_frequency;
@@ -267,7 +268,7 @@ main(int ac, char **av)
        if (ac < 2)
                usage_help(false);
 
-       while ((c = getopt(ac, av, "c:hsvztl:m:n:r:R:e:i:p:I:Ud")) != -1) {
+       while ((c = getopt(ac, av, "c:hsvztTl:m:n:r:R:e:i:p:I:Ud")) != -1) {
                switch(c) {
                case 'd':
                        dflag = 1;
@@ -318,6 +319,9 @@ main(int ac, char **av)
                case 't':
                        use_precise_timer = 1;
                        break;
+               case 'T':
+                       allow_tsc_timer = 0;
+                       break;
                case 'v':
                        bootverbose = 1;
                        break;
@@ -1575,7 +1579,7 @@ static
 void
 usage_help(_Bool help)
 {
-       fprintf(stderr, "Usage: %s [-hsUvdt] [-c file] [-e name=value:name=value:...]\n"
+       fprintf(stderr, "Usage: %s [-hsUvdtT] [-c file] [-e name=value:name=value:...]\n"
            "\t[-i file] [-I interface[:address1[:address2][/netmask]]] [-l cpulock]\n"
            "\t[-m size] [-n numcpus[:lbits[:cbits]]]\n"
            "\t[-p file] [-r file]\n", save_av[0]);
@@ -1604,6 +1608,7 @@ usage_help(_Bool help)
                    "\t-R\tSpecify a COW disk image file, iterates vkd0..n\n"
                    "\t-s\tBoot into single-user mode.\n"
                    "\t-t\tUse a precise host timer when calculating clock values.\n"
+                   "\t-T\tDisallow use of the TSC cpu timer as a clock.\n"
                    "\t-U\tEnable writing to kernel memory and module loading.\n"
                    "\t-v\tTurn on verbose booting.\n");
 
index b168bc4..90707ef 100644 (file)
@@ -174,7 +174,7 @@ static void
 vktimer_intr_initclock(struct cputimer_intr *cti __unused,
                       boolean_t selected __unused)
 {
-       vktimer_target = vkernel_timer_get_timecount();
+       vktimer_target = sys_cputimer->count();
 
        vktimer_ts.tv_nsec = 1000000000 / 20;
        vktimer_cotd = cothread_create(vktimer_thread, NULL, NULL, "vktimer");
@@ -237,6 +237,7 @@ vktimer_thread(cothread_t cotd)
                sysclock_t reload;
                ssysclock_t delta;
                int n;
+               uint32_t freq;
 
                /*
                 * Sleep
@@ -244,8 +245,9 @@ vktimer_thread(cothread_t cotd)
                cothread_sleep(cotd, &vktimer_ts);
 
 rescan:
-               curtime = vkernel_timer_get_timecount();
-               reload = 999999;
+               freq = sys_cputimer->freq;
+               curtime = sys_cputimer->count();
+               reload = freq - 1;
 
                /*
                 * Reset the target
@@ -271,14 +273,15 @@ rescan:
                        if (delta > 0 && reload > delta)
                                goto rescan;
                }
-               if (!use_precise_timer && reload < ticklength_us / 10) {
+               if (sys_cputimer == &vkernel_cputimer &&
+                    !use_precise_timer && reload < ticklength_us / 10) {
                        /*
                         * Avoid pointless short sleeps, when we only measure
                         * the current time at tick precision.
                         */
                        reload = ticklength_us / 10;
                }
-               vktimer_ts.tv_nsec = reload * 1000;
+               vktimer_ts.tv_nsec = ((uint64_t)reload * 1000000000) / freq;
        }
 }
 
@@ -290,9 +293,9 @@ rescan:
 static void
 vktimer_intr_reload(struct cputimer_intr *cti __unused, sysclock_t reload)
 {
-       if (reload >= 1000000)          /* uS */
-               reload = 1000000;
-       reload += vkernel_timer_get_timecount();
+       if (reload >= sys_cputimer->freq)
+               reload = sys_cputimer->freq;
+       reload += sys_cputimer->count();
        vktimer_reload[mycpu->gd_cpuid] = reload;
        if (vktimer_cotd && (ssysclock_t)(reload - vktimer_target) < 0) {
                while ((sysclock_t)(reload - vktimer_target) < 0)