vkernel - Restabilize pmap code, redo kqueue, systimer, and console code master
authorMatthew Dillon <dillon@apollo.backplane.com>
Sun, 22 Jan 2017 07:43:07 +0000 (23:43 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Sun, 22 Jan 2017 07:43:07 +0000 (23:43 -0800)
* Remove vm_token and add necessary vm_page spin locks to the vkernel's
  pmap code, improving its stability.

* Separate the systimer interrupt and console tty support from the
  kqueue subsystem.

  Uses SIGURG for systimer
  Uses SIGIO for kqueue
  Uses SIGALRM for cothread signalling

* The vkernel systimer code now uses a dedicated cothread for timing.  The
  cothread is a bit of a hack at the moment but is a more direct way of
  handling systimers.

* Attempt to fix user%/sys%/intr%/idle% in the systat -vm and
  systat -pv output.  Still isn't perfect, but it is now more
  accurate.

18 files changed:
sys/cpu/x86_64/include/cpu.h
sys/kern/kern_clock.c
sys/kern/kern_intr.c
sys/platform/pc64/x86_64/mp_machdep.c
sys/platform/vkernel64/include/cothread.h
sys/platform/vkernel64/include/md_var.h
sys/platform/vkernel64/include/smp.h
sys/platform/vkernel64/platform/console.c
sys/platform/vkernel64/platform/cothread.c
sys/platform/vkernel64/platform/init.c
sys/platform/vkernel64/platform/kqueue.c
sys/platform/vkernel64/platform/machintr.c
sys/platform/vkernel64/platform/pmap.c
sys/platform/vkernel64/platform/systimer.c
sys/platform/vkernel64/x86_64/exception.c
sys/platform/vkernel64/x86_64/mp.c
sys/platform/vkernel64/x86_64/trap.c
sys/sys/globaldata.h

index ba65e70..f97c5bd 100644 (file)
     atomic_set_int(&mycpu->gd_reqflags, RQF_AST_OWEUPC)
 #define        need_ipiq()             \
     atomic_set_int(&mycpu->gd_reqflags, RQF_IPIQ)
+#define        need_timer()            \
+    atomic_set_int(&mycpu->gd_reqflags, RQF_TIMER)
+#ifdef _KERNEL_VIRTUAL
+#define        need_kqueue()           \
+    atomic_set_int(&mycpu->gd_reqflags, RQF_KQUEUE)
+#endif
 #define        signotify()             \
     atomic_set_int(&mycpu->gd_reqflags, RQF_AST_SIGNAL)
 #define        clear_user_resched()    \
index 6768275..a94d4a7 100644 (file)
@@ -344,11 +344,15 @@ initclocks_other(void *dummy)
                 * (8254 gets reset).  The sysclock will never jump backwards.
                 * Our time sync is based on the actual sysclock, not the
                 * ticks count.
+                *
+                * Install statclock before hardclock to prevent statclock
+                * from misinterpreting gd_flags for tick assignment when
+                * they overlap.
                 */
-               systimer_init_periodic_nq(&gd->gd_hardclock, hardclock,
-                                         NULL, hz);
                systimer_init_periodic_nq(&gd->gd_statclock, statclock,
                                          NULL, stathz);
+               systimer_init_periodic_nq(&gd->gd_hardclock, hardclock,
+                                         NULL, hz);
                /* XXX correct the frequency for scheduler / estcpu tests */
                systimer_init_periodic_nq(&gd->gd_schedclock, schedclock,
                                          NULL, ESTCPUFREQ);
@@ -709,6 +713,7 @@ statclock(systimer_t info, int in_ipi, struct intrframe *frame)
        struct gmonparam *g;
        int i;
 #endif
+       globaldata_t gd = mycpu;
        thread_t td;
        struct proc *p;
        int bump;
@@ -725,7 +730,7 @@ statclock(systimer_t info, int in_ipi, struct intrframe *frame)
         *       MPSAFE at early boot.
         */
        cv = sys_cputimer->count();
-       scv = mycpu->statint.gd_statcv;
+       scv = gd->statint.gd_statcv;
        if (scv == 0) {
                bump = 1;
        } else {
@@ -735,10 +740,10 @@ statclock(systimer_t info, int in_ipi, struct intrframe *frame)
                if (bump > 1000000)
                        bump = 1000000;
        }
-       mycpu->statint.gd_statcv = cv;
+       gd->statint.gd_statcv = cv;
 
 #if 0
-       stv = &mycpu->gd_stattv;
+       stv = &gd->gd_stattv;
        if (stv->tv_sec == 0) {
            bump = 1;
        } else {
@@ -772,7 +777,7 @@ statclock(systimer_t info, int in_ipi, struct intrframe *frame)
                else
                        cpu_time.cp_user += bump;
        } else {
-               int intr_nest = mycpu->gd_intr_nesting_level;
+               int intr_nest = gd->gd_intr_nesting_level;
 
                if (in_ipi) {
                        /*
@@ -813,30 +818,47 @@ statclock(systimer_t info, int in_ipi, struct intrframe *frame)
                 * XXX assume system if frame is NULL.  A NULL frame 
                 * can occur if ipi processing is done from a crit_exit().
                 */
-               if (IS_INTR_RUNNING)
-                       td->td_iticks += bump;
-               else
-                       td->td_sticks += bump;
-
                if (IS_INTR_RUNNING) {
                        /*
                         * If we interrupted an interrupt thread, well,
                         * count it as interrupt time.
                         */
+                       td->td_iticks += bump;
 #ifdef DEBUG_PCTRACK
                        if (frame)
                                do_pctrack(frame, PCTRACK_INT);
 #endif
                        cpu_time.cp_intr += bump;
+#ifdef _KERNEL_VIRTUAL
+               } else if (gd->gd_flags & GDF_VIRTUSER) {
+                       /*
+                        * The vkernel doesn't do a good job providing trap
+                        * frames that we can test.  If the GDF_VIRTUSER
+                        * flag is set we probably interrupted user mode.
+                        */
+                       td->td_uticks += bump;
+
+                       /*
+                        * Charge the time as appropriate
+                        */
+                       if (p && p->p_nice > NZERO)
+                               cpu_time.cp_nice += bump;
+                       else
+                               cpu_time.cp_user += bump;
+#endif
                } else {
-                       if (td == &mycpu->gd_idlethread) {
+#if 0
+                       kprintf("THREAD %s %p %p %08x\n", td->td_comm, td, &gd->gd_idlethread, gd->gd_reqflags);
+#endif
+                       td->td_sticks += bump;
+                       if (td == &gd->gd_idlethread) {
                                /*
                                 * Even if the current thread is the idle
                                 * thread it could be due to token contention
                                 * in the LWKT scheduler.  Count such as
                                 * system time.
                                 */
-                               if (mycpu->gd_reqflags & RQF_IDLECHECK_WK_MASK)
+                               if (gd->gd_reqflags & RQF_IDLECHECK_WK_MASK)
                                        cpu_time.cp_sys += bump;
                                else
                                        cpu_time.cp_idle += bump;
index 9ec2b7f..f012ac4 100644 (file)
@@ -887,6 +887,9 @@ ithread_handler(void *arg)
            for (rec = *list; rec; rec = nrec) {
                /* rec may be invalid after call */
                nrec = rec->next;
+               if (rec->handler == NULL) {
+                   kprintf("NULL HANDLER %s\n", rec->name);
+               } else
                if (rec->serializer) {
                    lwkt_serialize_handler_call(rec->serializer, rec->handler,
                                                rec->argument, NULL);
index b555e41..d042241 100644 (file)
@@ -1658,12 +1658,10 @@ static void
 detect_amd_topology(int count_htt_cores)
 {
        int shift = 0;
-       if ((cpu_feature & CPUID_HTT)
-                       && (amd_feature2 & AMDID2_CMP)) {
-               
+       if ((cpu_feature & CPUID_HTT) && (amd_feature2 & AMDID2_CMP)) {
                if (cpu_procinfo2 & AMDID_COREID_SIZE) {
-                       core_bits = (cpu_procinfo2 & AMDID_COREID_SIZE)
-                           >> AMDID_COREID_SIZE_SHIFT;
+                       core_bits = (cpu_procinfo2 & AMDID_COREID_SIZE) >>
+                                   AMDID_COREID_SIZE_SHIFT;
                } else {
                        core_bits = (cpu_procinfo2 & AMDID_CMP_CORES) + 1;
                        for (shift = 0; (1 << shift) < core_bits; ++shift)
@@ -1690,6 +1688,7 @@ amd_get_compute_unit_id(void *arg)
 
        do_cpuid(0x8000001e, regs);
        cpu_node_t * mynode = get_cpu_node_by_cpuid(mycpuid);
+
        /* 
         * AMD - CPUID Specification September 2010
         * page 34 - //ComputeUnitID = ebx[0:7]//
@@ -1716,11 +1715,11 @@ fix_amd_topology(void)
                kprintf("%d-%d; \n",
                        i, get_cpu_node_by_cpuid(i)->compute_unit_id);
        }
-
        return 0;
 }
 
-/* Calculate
+/*
+ * Calculate
  * - logical_CPU_bits
  * - core_bits
  * With the values above (for AMD or INTEL) we are able to generally
@@ -1750,12 +1749,15 @@ detect_cpu_topology(void)
        topology_detected = 1;
 
 OUT:
-       if (bootverbose)
-               kprintf("Bits within APICID: logical_CPU_bits: %d; core_bits: %d\n",
-                   logical_CPU_bits, core_bits);
+       if (bootverbose) {
+               kprintf("Bits within APICID: logical_CPU_bits: %d; "
+                       "core_bits: %d\n",
+                       logical_CPU_bits, core_bits);
+       }
 }
 
-/* Interface functions to calculate chip_ID,
+/*
+ * Interface functions to calculate chip_ID,
  * core_number and logical_number
  * Ref: http://wiki.osdev.org/Detecting_CPU_Topology_(80x86)
  */
@@ -1775,13 +1777,13 @@ get_chip_ID_from_APICID(int apicid)
 int
 get_core_number_within_chip(int cpuid)
 {
-       return (get_apicid_from_cpuid(cpuid) >> logical_CPU_bits) &
-           ( (1 << core_bits) -1);
+       return ((get_apicid_from_cpuid(cpuid) >> logical_CPU_bits) &
+               ((1 << core_bits) - 1));
 }
 
 int
 get_logical_CPU_number_within_core(int cpuid)
 {
-       return get_apicid_from_cpuid(cpuid) &
-           ( (1 << logical_CPU_bits) -1);
+       return (get_apicid_from_cpuid(cpuid) &
+               ((1 << logical_CPU_bits) - 1));
 }
index 7fd72a2..203774c 100644 (file)
@@ -62,4 +62,7 @@ void cothread_wait(cothread_t cotd);
 void cothread_lock(cothread_t cotd, int is_cotd);
 void cothread_unlock(cothread_t cotd, int is_cotd);
 
+void cothread_sleep(cothread_t cotd, struct timespec *ts);
+void cothread_wakeup(cothread_t cotd, struct timespec *ts);
+
 #endif
index b47f972..4725922 100644 (file)
@@ -77,6 +77,7 @@ extern        vpte_t  *KernelPTA;     /* NOTE: Offset for direct VA translation */
 extern vpte_t  *KernelPTD;
 extern vm_offset_t crashdumpmap;
 extern  int    cpu_fxsr;
+extern  pthread_t ap_tids[MAXCPU];
 
 extern  char    cpu_vendor[];  /* XXX belongs in i386 */
 extern  u_int   cpu_vendor_id; /* XXX belongs in i386 */
@@ -119,14 +120,13 @@ void vcons_set_mode(int);
 int npxdna(struct trapframe *);
 void npxpush(struct __mcontext *mctx);
 void npxpop(struct __mcontext *mctx);
+void kqueue_intr(struct intrframe *);
+void vktimer_intr(struct intrframe *);
 
 void signalintr(int intr);
 
 struct kqueue_info;
 struct kqueue_info *kqueue_add(int, void (*)(void *, struct intrframe *), void *);
 void kqueue_del(struct kqueue_info *);
-struct kqueue_info *kqueue_add_timer(void (*func)(void *, struct intrframe *), void *data);
-void kqueue_reload_timer(struct kqueue_info *info, int ms);
-
 
 #endif
index 93cd3e3..5fa2a34 100644 (file)
@@ -43,7 +43,7 @@ int get_core_number_within_chip(int cpuid);
 int get_logical_CPU_number_within_core(int cpuid);
 
 /* Assume that APICID = CPUID for virtual processors */
-#define get_cpuid_from_apicid(cpuid) cpuid
+#define get_cpuid_from_apicid(apicid) apicid
 #define get_apicid_from_cpuid(cpuid) cpuid
 
 #endif /* !LOCORE */
index c14dbba..1c1d427 100644 (file)
@@ -52,6 +52,7 @@
 
 static int console_stolen_by_kernel;
 static struct kqueue_info *kqueue_console_info;
+static struct tty *kqueue_console_tty;
 
 /************************************************************************
  *                         CONSOLE DEVICE                              *
@@ -61,7 +62,7 @@ static struct kqueue_info *kqueue_console_info;
 
 static int vcons_tty_param(struct tty *tp, struct termios *tio);
 static void vcons_tty_start(struct tty *tp);
-static void vcons_intr(void *tpx, struct intrframe *frame __unused);
+static void vcons_hardintr(void *tpx, struct intrframe *frame __unused);
 
 static d_open_t         vcons_open;
 static d_close_t        vcons_close;
@@ -109,8 +110,10 @@ vcons_open(struct dev_open_args *ap)
                error = (*linesw[tp->t_line].l_open)(dev, tp);
                ioctl(0, TIOCGWINSZ, &tp->t_winsize);
 
-               if (kqueue_console_info == NULL)
-                       kqueue_console_info = kqueue_add(0, vcons_intr, tp);
+               if (kqueue_console_info == NULL) {
+                       kqueue_console_tty = tp;
+                       kqueue_console_info = kqueue_add(0, vcons_hardintr, tp);
+               }
        } else {
                /* dummy up other minors so the installer will run */
                error = 0;
@@ -196,34 +199,10 @@ vcons_tty_start(struct tty *tp)
 
 static
 void
-vcons_intr(void *tpx, struct intrframe *frame __unused)
+vcons_hardintr(void *tpx, struct intrframe *frame __unused)
 {
-       struct tty *tp = tpx;
-       unsigned char buf[32];
-       int i;
-       int n;
-
-       lwkt_gettoken(&tty_token);
-       /*
-        * If we aren't open we only have synchronous traffic via the
-        * debugger and do not need to poll.
-        */
-       if ((tp->t_state & TS_ISOPEN) == 0) {
-               lwkt_reltoken(&tty_token);
-               return;
-       }
-
-       /*
-        * Only poll if we are open and haven't been stolen by the debugger.
-        */
-       if (console_stolen_by_kernel == 0 && (tp->t_state & TS_ISOPEN)) {
-               do {
-                       n = extpread(0, buf, sizeof(buf), O_FNONBLOCKING, -1LL);
-                       for (i = 0; i < n; ++i)
-                               (*linesw[tp->t_line].l_rint)(buf[i], tp);
-               } while (n > 0);
-       }
-       lwkt_reltoken(&tty_token);
+       if (console_stolen_by_kernel == 0)
+               signalintr(4);
 }
 
 /************************************************************************
@@ -306,6 +285,47 @@ vconswinch_intr(void *arg __unused, void *frame __unused)
        }
 }
 
+/*
+ * This has to be an interrupt thread and not a hard interrupt.
+ */
+static
+void
+vconsvirt_intr(void *arg __unused, void *frame __unused)
+{
+       struct tty *tp;
+       unsigned char buf[32];
+       int i;
+       int n;
+
+       if (kqueue_console_info == NULL)
+               return;
+       tp = kqueue_console_tty;
+
+       lwkt_gettoken(&tty_token);
+       /*
+        * If we aren't open we only have synchronous traffic via the
+        * debugger and do not need to poll.
+        */
+       if ((tp->t_state & TS_ISOPEN) == 0) {
+               lwkt_reltoken(&tty_token);
+               return;
+       }
+
+       /*
+        * Only poll if we are open and haven't been stolen by the debugger.
+        */
+       if (console_stolen_by_kernel == 0 && (tp->t_state & TS_ISOPEN)) {
+               do {
+                       n = extpread(0, buf, sizeof(buf), O_FNONBLOCKING, -1LL);
+                       for (i = 0; i < n; ++i)
+                               (*linesw[tp->t_line].l_rint)(buf[i], tp);
+               } while (n > 0);
+       }
+       lwkt_reltoken(&tty_token);
+}
+
+
+
 static void
 vconscleanup(void)
 {
@@ -346,7 +366,9 @@ vconsinit_fini(struct consdev *cp)
         * to use the interrupt subsystem.
         */
        register_int_virtual(3, vconswinch_intr, NULL, "swinch", NULL,
-           INTR_MPSAFE);
+                            INTR_MPSAFE);
+       register_int_virtual(4, vconsvirt_intr, NULL, "vintr", NULL,
+                            INTR_MPSAFE);
        bzero(&sa, sizeof(sa));
        sigemptyset(&sa.sa_mask);
        sa.sa_handler = vconswinchsig;
index 0aa2ed7..d4894fe 100644 (file)
@@ -47,6 +47,7 @@
 #include <sys/tls.h>
 #include <sys/types.h>
 #include <sys/bus.h>
+#include <time.h>
 
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
@@ -95,8 +96,11 @@ cothread_create(void (*thr_func)(cothread_t cotd),
 
        cotd->pintr = pthread_self();
 
-       cotd->intr_id = register_int_virtual(1, (void *)thr_intr, cotd, name,
-           NULL, INTR_MPSAFE);
+       if (thr_intr) {
+               cotd->intr_id = register_int_virtual(1, (void *)thr_intr,
+                                                    cotd, name,
+                                                    NULL, INTR_MPSAFE);
+       }
 
        /*
         * The vkernel's cpu_disable_intr() masks signals.  We don't want
@@ -105,8 +109,8 @@ cothread_create(void (*thr_func)(cothread_t cotd),
        pthread_attr_init(&attr);
        if (vmm_enabled) {
                stack = mmap(NULL, KERNEL_STACK_SIZE,
-                   PROT_READ|PROT_WRITE|PROT_EXEC,
-                   MAP_ANON, -1, 0);
+                            PROT_READ|PROT_WRITE|PROT_EXEC,
+                            MAP_ANON, -1, 0);
                if (stack == MAP_FAILED) {
                        panic("Unable to allocate stack for cothread\n");
                }
@@ -132,7 +136,8 @@ cothread_delete(cothread_t *cotdp)
        cothread_t cotd;
 
        if ((cotd = *cotdp) != NULL) {
-               unregister_int_virtual(cotd->intr_id);
+               if (cotd->thr_intr)
+                       unregister_int_virtual(cotd->intr_id);
                crit_enter();
                pthread_join(cotd->pthr, NULL);
                crit_exit();
@@ -162,7 +167,7 @@ cothread_thread(void *arg)
 void
 cothread_intr(cothread_t cotd)
 {
-       pthread_kill(cotd->pintr, SIGIO);
+       pthread_kill(cotd->pintr, SIGALRM);
 }
 
 /*
@@ -186,6 +191,23 @@ cothread_wait(cothread_t cotd)
 }
 
 /*
+ * Used for systimer support
+ */
+void
+cothread_sleep(cothread_t cotd, struct timespec *ts)
+{
+       nanosleep(ts, NULL);
+}
+
+void
+cothread_wakeup(cothread_t cotd, struct timespec *ts)
+{
+       ts->tv_sec = 0;
+       ts->tv_nsec = 0;
+       pthread_kill(cotd->pthr, SIGINT);
+}
+
+/*
  * Typically called by kernel thread or cothread
  *
  * These must be a matched pair.  We will acquire a critical
index 37cbc58..3ed0cde 100644 (file)
@@ -88,7 +88,6 @@ vm_phystable_t phys_avail[16];
 vm_paddr_t Maxmem;
 vm_paddr_t Maxmem_bytes;
 long physmem;
-int naps = 0; /* # of Applications processors */
 int MemImageFd = -1;
 struct vkdisk_info DiskInfo[VKDISK_MAX];
 int DiskNum;
@@ -118,6 +117,7 @@ int tsc_invariant;
 int tsc_mpsync;
 int64_t tsc_frequency;
 int optcpus;           /* number of cpus - see mp_start() */
+int cpu_bits;
 int lwp_cpu_lock;      /* if/how to lock virtual CPUs to real CPUs */
 int real_ncpus;                /* number of real CPUs */
 int next_cpu;          /* next real CPU to lock a virtual CPU to */
@@ -236,11 +236,13 @@ main(int ac, char **av)
        eflag = 0;
        pos = 0;
        kenv_size = 0;
+
        /*
         * Process options
         */
        kernel_mem_readonly = 1;
        optcpus = 2;
+       cpu_bits = 1;
        vkernel_b_arg = 0;
        vkernel_B_arg = 0;
        lwp_cpu_lock = LCL_NONE;
@@ -392,18 +394,31 @@ main(int ac, char **av)
                        optcpus = strtol(tok, NULL, 0);
                        if (optcpus < 1 || optcpus > MAXCPU)
                                usage_err("Bad ncpus, valid range is 1-%d", MAXCPU);
+                       cpu_bits = 1;
+                       while ((1 << cpu_bits) < optcpus)
+                               ++cpu_bits;
+
+                       /*
+                        * By default assume simple hyper-threading
+                        */
+                       vkernel_b_arg = 1;
+                       vkernel_B_arg = cpu_bits - vkernel_b_arg;
 
-                       /* :lbits argument */
+                       /*
+                        * [:lbits[:cbits]] override # of cpu bits
+                        * for logical and core extraction, supplying
+                        * defaults for any omission.
+                        */
                        tok = strtok(NULL, ":");
                        if (tok != NULL) {
                                vkernel_b_arg = strtol(tok, NULL, 0);
+                               vkernel_B_arg = cpu_bits - vkernel_b_arg;
 
                                /* :cbits argument */
                                tok = strtok(NULL, ":");
                                if (tok != NULL) {
                                        vkernel_B_arg = strtol(tok, NULL, 0);
                                }
-
                        }
                        break;
                case 'p':
index 89ece1e..8768461 100644 (file)
@@ -59,11 +59,7 @@ struct kqueue_info {
        int fd;
 };
 
-static void kqueuesig(int signo);
-static void kqueue_intr(void *arg __unused, void *frame __unused);
-
 static int KQueueFd = -1;
-static void *VIntr1;
 
 /*
  * Initialize kqueue based I/O
@@ -76,14 +72,15 @@ static void *VIntr1;
 void
 init_kqueue(void)
 {
+#if 0
        struct sigaction sa;
-
        bzero(&sa, sizeof(sa));
        /*sa.sa_mailbox = &mdcpu->gd_mailbox;*/
        sa.sa_flags = 0;
        sa.sa_handler = kqueuesig;
        sigemptyset(&sa.sa_mask);
        sigaction(SIGIO, &sa, NULL);
+#endif
        KQueueFd = kqueue();
        if (fcntl(KQueueFd, F_SETOWN, getpid()) < 0)
                panic("Cannot configure kqueue for SIGIO, update your kernel");
@@ -91,6 +88,7 @@ init_kqueue(void)
                panic("Cannot configure kqueue for SIGIO, update your kernel");
 }
 
+#if 0
 /*
  * Signal handler dispatches interrupt thread.  Use interrupt #1
  */
@@ -100,6 +98,8 @@ kqueuesig(int signo)
        signalintr(1);
 }
 
+#endif
+
 /*
  * Generic I/O event support
  */
@@ -110,11 +110,6 @@ kqueue_add(int fd, void (*func)(void *, struct intrframe *), void *data)
        struct kqueue_info *info;
        struct kevent kev;
 
-       if (VIntr1 == NULL) {
-               VIntr1 = register_int_virtual(1, kqueue_intr, NULL, "kqueue",
-                   NULL, INTR_MPSAFE);
-       }
-
        info = kmalloc(sizeof(*info), M_DEVBUF, M_ZERO|M_INTWAIT);
        info->func = func;
        info->data = data;
@@ -125,6 +120,7 @@ kqueue_add(int fd, void (*func)(void *, struct intrframe *), void *data)
        return(info);
 }
 
+#if 0
 /*
  * Medium resolution timer support
  */
@@ -133,15 +129,11 @@ kqueue_add_timer(void (*func)(void *, struct intrframe *), void *data)
 {
        struct kqueue_info *info;
 
-       if (VIntr1 == NULL) {
-               VIntr1 = register_int_virtual(1, kqueue_intr, NULL, "kqueue",
-                   NULL, INTR_MPSAFE);
-       }
-
        info = kmalloc(sizeof(*info), M_DEVBUF, M_ZERO|M_INTWAIT);
        info->func = func;
        info->data = data;
        info->fd = (uintptr_t)info;
+
        return(info);
 }
 
@@ -158,6 +150,7 @@ kqueue_reload_timer(struct kqueue_info *info, int ms)
        if (kevent(KQueueFd, &kev, 1, NULL, 0, &ts) < 0)
                panic("kqueue_reload_timer: Failed");
 }
+#endif
 
 /*
  * Destroy a previously added kqueue event
@@ -182,9 +175,8 @@ kqueue_del(struct kqueue_info *info)
  * Calleld with the MP lock held.  Note that this is still an interrupt
  * thread context.
  */
-static
 void
-kqueue_intr(void *arg __unused, void *frame __unused)
+kqueue_intr(struct intrframe *frame)
 {
        struct timespec ts;
        struct kevent kevary[8];
index 4af763d..8dc7021 100644 (file)
@@ -125,7 +125,8 @@ splz(void)
        thread_t td = gd->mi.gd_curthread;
        int irq;
 
-       while (gd->mi.gd_reqflags & (RQF_IPIQ|RQF_INTPEND)) {
+       while (gd->mi.gd_reqflags & (RQF_IPIQ|RQF_INTPEND|
+                                    RQF_TIMER|RQF_KQUEUE)) {
                crit_enter_quick(td);
                if (gd->mi.gd_reqflags & RQF_IPIQ) {
                        atomic_clear_int(&gd->mi.gd_reqflags, RQF_IPIQ);
@@ -146,6 +147,14 @@ splz(void)
                                sched_ithd_hard_virtual(irq);
                        }
                }
+               if (gd->mi.gd_reqflags & RQF_TIMER) {
+                       atomic_clear_int(&gd->mi.gd_reqflags, RQF_TIMER);
+                       vktimer_intr(NULL);
+               }
+               if (gd->mi.gd_reqflags & RQF_KQUEUE) {
+                       atomic_clear_int(&gd->mi.gd_reqflags, RQF_KQUEUE);
+                       kqueue_intr(NULL);
+               }
                crit_exit_noyield(td);
        }
 }
@@ -180,7 +189,8 @@ signalintr(int intr)
 void
 cpu_disable_intr(void)
 {
-       sigblock(sigmask(SIGALRM)|sigmask(SIGIO)|sigmask(SIGUSR1));
+       sigblock(sigmask(SIGALRM)|sigmask(SIGIO)|sigmask(SIGUSR1)|
+                sigmask(SIGURG));
 }
 
 void
@@ -194,7 +204,7 @@ cpu_mask_all_signals(void)
 {
        sigblock(sigmask(SIGALRM)|sigmask(SIGIO)|sigmask(SIGQUIT)|
                 sigmask(SIGUSR1)|sigmask(SIGTERM)|sigmask(SIGWINCH)|
-                sigmask(SIGUSR2));
+                sigmask(SIGUSR2)|sigmask(SIGURG));
 }
 
 void
index 474a729..e3c1142 100644 (file)
@@ -713,7 +713,7 @@ pmap_extract(pmap_t pmap, vm_offset_t va)
        pt_entry_t *pte;
        pd_entry_t pde, *pdep;
 
-       lwkt_gettoken(&vm_token);
+       vm_object_hold(pmap->pm_pteobj);
        rtval = 0;
        pdep = pmap_pde(pmap, va);
        if (pdep != NULL) {
@@ -728,7 +728,8 @@ pmap_extract(pmap_t pmap, vm_offset_t va)
                        }
                }
        }
-       lwkt_reltoken(&vm_token);
+       vm_object_drop(pmap->pm_pteobj);
+
        return rtval;
 }
 
@@ -1099,7 +1100,7 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m)
                }
 
                KKASSERT(pmap->pm_stats.resident_count > 0);
-               --pmap->pm_stats.resident_count;
+               atomic_add_long(&pmap->pm_stats.resident_count, -1);
 
                if (pmap->pm_ptphint == m)
                        pmap->pm_ptphint = NULL;
@@ -1360,7 +1361,7 @@ pmap_release_free_page(struct pmap *pmap, vm_page_t p)
                /* JG What about wire_count? */
        }
        KKASSERT(pmap->pm_stats.resident_count > 0);
-       --pmap->pm_stats.resident_count;
+       atomic_add_long(&pmap->pm_stats.resident_count, -1);
 
        if (p->hold_count)  {
                panic("pmap_release: freeing held pt page "
@@ -1419,7 +1420,7 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex)
         * Map the pagetable page into the process address space, if
         * it isn't already there.
         */
-       ++pmap->pm_stats.resident_count;
+       atomic_add_long(&pmap->pm_stats.resident_count, 1);
 
        if (ptepindex >= (NUPDE + NUPDPE)) {
                pml4_entry_t *pml4;
@@ -1603,7 +1604,6 @@ pmap_release(struct pmap *pmap)
 
        KKASSERT(pmap != &kernel_pmap);
 
-       lwkt_gettoken(&vm_token);
 #if defined(DIAGNOSTIC)
        if (object->ref_count != 1)
                panic("pmap_release: pteobj reference count != 1");
@@ -1631,7 +1631,6 @@ pmap_release(struct pmap *pmap)
                }
        } while (info.error);
        vm_object_drop(object);
-       lwkt_reltoken(&vm_token);
 }
 
 static int
@@ -1754,11 +1753,8 @@ pmap_growkernel(vm_offset_t kstart, vm_offset_t kend)
 void
 pmap_reference(pmap_t pmap)
 {
-       if (pmap) {
-               lwkt_gettoken(&vm_token);
-               ++pmap->pm_count;
-               lwkt_reltoken(&vm_token);
-       }
+       if (pmap)
+               atomic_add_int(&pmap->pm_count, 1);
 }
 
 /************************************************************************
@@ -1865,7 +1861,6 @@ pmap_collect(void)
 
        if (pmap_pagedaemon_waken == 0)
                return;
-       lwkt_gettoken(&vm_token);
        pmap_pagedaemon_waken = 0;
 
        if (warningdone < 5) {
@@ -1885,7 +1880,6 @@ pmap_collect(void)
                        vm_page_wakeup(m);
                }
        }
-       lwkt_reltoken(&vm_token);
 }
 
 
@@ -1895,7 +1889,7 @@ pmap_collect(void)
  * to the header.  Otherwise we must search the list for
  * the entry.  In either case we free the now unused entry.
  *
- * caller must hold vm_token.
+ * pmap->pm_pteobj must be held and (m) must be spin-locked by the caller.
  */
 static int
 pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va)
@@ -1929,11 +1923,8 @@ pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va)
                if (TAILQ_EMPTY(&m->md.pv_list))
                        vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
                TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
-               ++pmap->pm_generation;
-               KKASSERT(pmap->pm_pteobj != NULL);
-               vm_object_hold(pmap->pm_pteobj);
+               atomic_add_int(&pmap->pm_generation, 1);
                rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
-               vm_object_drop(pmap->pm_pteobj);
                free_pv_entry(pv);
        }
        return rtval;
@@ -1942,13 +1933,14 @@ pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va)
 /*
  * Create a pv entry for page at pa for (pmap, va).  If the page table page
  * holding the VA is managed, mpte will be non-NULL.
+ *
+ * pmap->pm_pteobj must be held and (m) must be spin-locked by the caller.
  */
 static void
 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
 {
        pv_entry_t pv;
 
-       crit_enter();
        pv = get_pv_entry();
        pv->pv_va = va;
        pv->pv_pmap = pmap;
@@ -1958,22 +1950,23 @@ pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
        TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
        m->md.pv_list_count++;
        atomic_add_int(&m->object->agg_pv_list_count, 1);
-
-       crit_exit();
 }
 
 /*
  * pmap_remove_pte: do the things to unmap a page in a process
+ *
+ * Caller holds pmap->pm_pteobj
  */
 static int
 pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va)
 {
        pt_entry_t oldpte;
        vm_page_t m;
+       int error;
 
        oldpte = pmap_inval_loadandclear(ptq, pmap, va);
        if (oldpte & VPTE_WIRED)
-               --pmap->pm_stats.wired_count;
+               atomic_add_long(&pmap->pm_stats.wired_count, -1);
        KKASSERT(pmap->pm_stats.wired_count >= 0);
 
 #if 0
@@ -1986,9 +1979,10 @@ pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va)
                cpu_invlpg((void *)va);
 #endif
        KKASSERT(pmap->pm_stats.resident_count > 0);
-       --pmap->pm_stats.resident_count;
+       atomic_add_long(&pmap->pm_stats.resident_count, -1);
        if (oldpte & VPTE_MANAGED) {
                m = PHYS_TO_VM_PAGE(oldpte);
+               vm_page_spin_lock(m);
                if (oldpte & VPTE_M) {
 #if defined(PMAP_DIAGNOSTIC)
                        if (pmap_nw_modified(oldpte)) {
@@ -2002,21 +1996,23 @@ pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va)
                }
                if (oldpte & VPTE_A)
                        vm_page_flag_set(m, PG_REFERENCED);
-               return pmap_remove_entry(pmap, m, va);
+               error = pmap_remove_entry(pmap, m, va);
+               vm_page_spin_unlock(m);
        } else {
-               return pmap_unuse_pt(pmap, va, NULL);
+               error = pmap_unuse_pt(pmap, va, NULL);
        }
-
-       return 0;
+       return error;
 }
 
 /*
  * pmap_remove_page:
  *
- *     Remove a single page from a process address space.
+ * Remove a single page from a process address space.
+ *
+ * This function may not be called from an interrupt if the pmap is
+ * not kernel_pmap.
  *
- *     This function may not be called from an interrupt if the pmap is
- *     not kernel_pmap.
+ * Caller holds pmap->pm_pteobj
  */
 static void
 pmap_remove_page(struct pmap *pmap, vm_offset_t va)
@@ -2055,10 +2051,8 @@ pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva)
                return;
 
        vm_object_hold(pmap->pm_pteobj);
-       lwkt_gettoken(&vm_token);
        KKASSERT(pmap->pm_stats.resident_count >= 0);
        if (pmap->pm_stats.resident_count == 0) {
-               lwkt_reltoken(&vm_token);
                vm_object_drop(pmap->pm_pteobj);
                return;
        }
@@ -2072,7 +2066,6 @@ pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva)
                pde = pmap_pde(pmap, sva);
                if (pde && (*pde & VPTE_PS) == 0) {
                        pmap_remove_page(pmap, sva);
-                       lwkt_reltoken(&vm_token);
                        vm_object_drop(pmap->pm_pteobj);
                        return;
                }
@@ -2118,7 +2111,8 @@ pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva)
                        /* JG FreeBSD has more complex treatment here */
                        KKASSERT(*pde != 0);
                        pmap_inval_pde(pde, pmap, sva);
-                       pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
+                       atomic_add_long(&pmap->pm_stats.resident_count,
+                                      -NBPDR / PAGE_SIZE);
                        continue;
                }
 
@@ -2141,7 +2135,6 @@ pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva)
                                break;
                }
        }
-       lwkt_reltoken(&vm_token);
        vm_object_drop(pmap->pm_pteobj);
 }
 
@@ -2158,6 +2151,8 @@ pmap_remove_all(vm_page_t m)
 {
        pt_entry_t *pte, tpte;
        pv_entry_t pv;
+       vm_object_t pmobj;
+       pmap_t pmap;
 
 #if defined(PMAP_DIAGNOSTIC)
        /*
@@ -2169,18 +2164,39 @@ pmap_remove_all(vm_page_t m)
        }
 #endif
 
-       lwkt_gettoken(&vm_token);
+restart:
+       vm_page_spin_lock(m);
        while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
-               KKASSERT(pv->pv_pmap->pm_stats.resident_count > 0);
-               --pv->pv_pmap->pm_stats.resident_count;
+               pmap = pv->pv_pmap;
+               pmobj = pmap->pm_pteobj;
 
-               pte = pmap_pte(pv->pv_pmap, pv->pv_va);
+               /*
+                * Handle reversed lock ordering
+                */
+               if (vm_object_hold_try(pmobj) == 0) {
+                       refcount_acquire(&pmobj->hold_count);
+                       vm_page_spin_unlock(m);
+                       vm_object_lock(pmobj);
+                       vm_page_spin_lock(m);
+                       if (pv != TAILQ_FIRST(&m->md.pv_list) ||
+                           pmap != pv->pv_pmap ||
+                           pmobj != pmap->pm_pteobj) {
+                               vm_page_spin_unlock(m);
+                               vm_object_drop(pmobj);
+                               goto restart;
+                       }
+               }
+
+               KKASSERT(pmap->pm_stats.resident_count > 0);
+               atomic_add_long(&pmap->pm_stats.resident_count, -1);
+
+               pte = pmap_pte(pmap, pv->pv_va);
                KKASSERT(pte != NULL);
 
-               tpte = pmap_inval_loadandclear(pte, pv->pv_pmap, pv->pv_va);
+               tpte = pmap_inval_loadandclear(pte, pmap, pv->pv_va);
                if (tpte & VPTE_WIRED)
-                       pv->pv_pmap->pm_stats.wired_count--;
-               KKASSERT(pv->pv_pmap->pm_stats.wired_count >= 0);
+                       atomic_add_long(&pmap->pm_stats.wired_count, -1);
+               KKASSERT(pmap->pm_stats.wired_count >= 0);
 
                if (tpte & VPTE_A)
                        vm_page_flag_set(m, PG_REFERENCED);
@@ -2196,24 +2212,23 @@ pmap_remove_all(vm_page_t m)
                                    pv->pv_va, tpte);
                        }
 #endif
-                       if (pmap_track_modified(pv->pv_pmap, pv->pv_va))
+                       if (pmap_track_modified(pmap, pv->pv_va))
                                vm_page_dirty(m);
                }
                TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
-               TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
-               ++pv->pv_pmap->pm_generation;
+               TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
+               atomic_add_int(&pmap->pm_generation, 1);
                m->md.pv_list_count--;
                atomic_add_int(&m->object->agg_pv_list_count, -1);
                KKASSERT(m->md.pv_list_count >= 0);
                if (TAILQ_EMPTY(&m->md.pv_list))
                        vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
-               vm_object_hold(pv->pv_pmap->pm_pteobj);
-               pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
-               vm_object_drop(pv->pv_pmap->pm_pteobj);
+               pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem);
+               vm_object_drop(pmobj);
                free_pv_entry(pv);
        }
        KKASSERT((m->flags & (PG_MAPPED|PG_WRITEABLE)) == 0);
-       lwkt_reltoken(&vm_token);
+       vm_page_spin_unlock(m);
 }
 
 /*
@@ -2225,22 +2240,23 @@ pmap_remove_specific(pmap_t pmap, vm_page_t m)
        pt_entry_t *pte, tpte;
        pv_entry_t pv;
 
-       lwkt_gettoken(&vm_token);
+       vm_object_hold(pmap->pm_pteobj);
 again:
+       vm_page_spin_lock(m);
        TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
                if (pv->pv_pmap != pmap)
                        continue;
 
-               KKASSERT(pv->pv_pmap->pm_stats.resident_count > 0);
-               --pv->pv_pmap->pm_stats.resident_count;
+               KKASSERT(pmap->pm_stats.resident_count > 0);
+               atomic_add_long(&pmap->pm_stats.resident_count, -1);
 
-               pte = pmap_pte(pv->pv_pmap, pv->pv_va);
+               pte = pmap_pte(pmap, pv->pv_va);
                KKASSERT(pte != NULL);
 
-               tpte = pmap_inval_loadandclear(pte, pv->pv_pmap, pv->pv_va);
+               tpte = pmap_inval_loadandclear(pte, pmap, pv->pv_va);
                if (tpte & VPTE_WIRED)
-                       pv->pv_pmap->pm_stats.wired_count--;
-               KKASSERT(pv->pv_pmap->pm_stats.wired_count >= 0);
+                       atomic_add_long(&pmap->pm_stats.wired_count, -1);
+               KKASSERT(pmap->pm_stats.wired_count >= 0);
 
                if (tpte & VPTE_A)
                        vm_page_flag_set(m, PG_REFERENCED);
@@ -2249,24 +2265,24 @@ again:
                 * Update the vm_page_t clean and reference bits.
                 */
                if (tpte & VPTE_M) {
-                       if (pmap_track_modified(pv->pv_pmap, pv->pv_va))
+                       if (pmap_track_modified(pmap, pv->pv_va))
                                vm_page_dirty(m);
                }
                TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
-               TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
-               ++pv->pv_pmap->pm_generation;
+               TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
+               atomic_add_int(&pmap->pm_generation, 1);
                m->md.pv_list_count--;
                atomic_add_int(&m->object->agg_pv_list_count, -1);
                KKASSERT(m->md.pv_list_count >= 0);
                if (TAILQ_EMPTY(&m->md.pv_list))
                        vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
-               vm_object_hold(pv->pv_pmap->pm_pteobj);
-               pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
-               vm_object_drop(pv->pv_pmap->pm_pteobj);
+               pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem);
+               vm_page_spin_unlock(m);
                free_pv_entry(pv);
                goto again;
        }
-       lwkt_reltoken(&vm_token);
+       vm_page_spin_unlock(m);
+       vm_object_drop(pmap->pm_pteobj);
 }
 
 /*
@@ -2300,10 +2316,9 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
        if (prot & VM_PROT_WRITE)
                return;
 
-       lwkt_gettoken(&vm_token);
+       vm_object_hold(pmap->pm_pteobj);
 
        for (; sva < eva; sva = va_next) {
-
                pml4e = pmap_pml4e(pmap, sva);
                if ((*pml4e & VPTE_V) == 0) {
                        va_next = (sva + NBPML4) & ~PML4MASK;
@@ -2333,7 +2348,8 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
                if ((ptpaddr & VPTE_PS) != 0) {
                        /* JG correct? */
                        pmap_clean_pde(pde, pmap, sva);
-                       pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
+                       atomic_add_long(&pmap->pm_stats.resident_count,
+                                       -NBPDR / PAGE_SIZE);
                        continue;
                }
 
@@ -2379,7 +2395,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
                        }
                }
        }
-       lwkt_reltoken(&vm_token);
+       vm_object_drop(pmap->pm_pteobj);
 }
 
 /*
@@ -2414,7 +2430,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
        va = trunc_page(va);
 
        vm_object_hold(pmap->pm_pteobj);
-       lwkt_gettoken(&vm_token);
 
        /*
         * Get the page table page.   The kernel_pmap's page table pages
@@ -2458,9 +2473,9 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
                 * the PT page will be also.
                 */
                if (wired && ((origpte & VPTE_WIRED) == 0))
-                       ++pmap->pm_stats.wired_count;
+                       atomic_add_long(&pmap->pm_stats.wired_count, 1);
                else if (!wired && (origpte & VPTE_WIRED))
-                       --pmap->pm_stats.wired_count;
+                       atomic_add_long(&pmap->pm_stats.wired_count, -1);
 
                /*
                 * Remove the extra pte reference.  Note that we cannot
@@ -2503,19 +2518,21 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
         * raise IPL while manipulating pv_table since pmap_enter can be
         * called at interrupt time.
         */
+       vm_page_spin_lock(m);
        if (pmap_initialized &&
            (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
                pmap_insert_entry(pmap, va, mpte, m);
                pa |= VPTE_MANAGED;
                vm_page_flag_set(m, PG_MAPPED);
        }
+       vm_page_spin_unlock(m);
 
        /*
         * Increment counters
         */
-       ++pmap->pm_stats.resident_count;
+       atomic_add_long(&pmap->pm_stats.resident_count, 1);
        if (wired)
-               pmap->pm_stats.wired_count++;
+               atomic_add_long(&pmap->pm_stats.wired_count, 1);
 
 validate:
        /*
@@ -2543,7 +2560,6 @@ validate:
                        vm_page_flag_set(m, PG_WRITEABLE);
        }
        KKASSERT((newpte & VPTE_MANAGED) == 0 || (m->flags & PG_MAPPED));
-       lwkt_reltoken(&vm_token);
        vm_object_drop(pmap->pm_pteobj);
 }
 
@@ -2573,7 +2589,6 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
        ptepindex = pmap_pde_pindex(va);
 
        vm_object_hold(pmap->pm_pteobj);
-       lwkt_gettoken(&vm_token);
 
        do {
                /*
@@ -2614,7 +2629,6 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
                pmap_unwire_pte_hold(pmap, va, mpte);
                pa = VM_PAGE_TO_PHYS(m);
                KKASSERT(((*pte ^ pa) & VPTE_FRAME) == 0);
-               lwkt_reltoken(&vm_token);
                vm_object_drop(pmap->pm_pteobj);
                return;
        }
@@ -2622,15 +2636,17 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
        /*
         * Enter on the PV list if part of our managed memory
         */
+       vm_page_spin_lock(m);
        if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
                pmap_insert_entry(pmap, va, mpte, m);
                vm_page_flag_set(m, PG_MAPPED);
        }
+       vm_page_spin_unlock(m);
 
        /*
         * Increment counters
         */
-       ++pmap->pm_stats.resident_count;
+       atomic_add_long(&pmap->pm_stats.resident_count, 1);
 
        pa = VM_PAGE_TO_PHYS(m);
 
@@ -2643,7 +2659,6 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
                *pte = (vpte_t)pa | VPTE_V | VPTE_U | VPTE_MANAGED;
        /*pmap_inval_add(&info, pmap, va); shouldn't be needed 0->valid */
        /*pmap_inval_flush(&info); don't need for vkernel */
-       lwkt_reltoken(&vm_token);
        vm_object_drop(pmap->pm_pteobj);
 }
 
@@ -2782,7 +2797,7 @@ pmap_prefault_ok(pmap_t pmap, vm_offset_t addr)
        pd_entry_t *pde;
        int ret;
 
-       lwkt_gettoken(&vm_token);
+       vm_object_hold(pmap->pm_pteobj);
        pde = pmap_pde(pmap, addr);
        if (pde == NULL || *pde == 0) {
                ret = 0;
@@ -2790,7 +2805,8 @@ pmap_prefault_ok(pmap_t pmap, vm_offset_t addr)
                pte = pmap_pde_to_pte(pde, addr);
                ret = (*pte) ? 0 : 1;
        }
-       lwkt_reltoken(&vm_token);
+       vm_object_drop(pmap->pm_pteobj);
+
        return (ret);
 }
 
@@ -2809,13 +2825,13 @@ pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired,
        if (pmap == NULL)
                return;
 
-       lwkt_gettoken(&vm_token);
+       vm_object_hold(pmap->pm_pteobj);
        pte = pmap_pte(pmap, va);
 
        if (wired && !pmap_pte_w(pte))
-               pmap->pm_stats.wired_count++;
+               atomic_add_long(&pmap->pm_stats.wired_count, 1);
        else if (!wired && pmap_pte_w(pte))
-               pmap->pm_stats.wired_count--;
+               atomic_add_long(&pmap->pm_stats.wired_count, -1);
 
        /*
         * Wiring is not a hardware characteristic so there is no need to
@@ -2828,7 +2844,7 @@ pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired,
                atomic_set_long(pte, VPTE_WIRED);
        else
                atomic_clear_long(pte, VPTE_WIRED);
-       lwkt_reltoken(&vm_token);
+       vm_object_drop(pmap->pm_pteobj);
 }
 
 /*
@@ -2942,21 +2958,18 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
        if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
                return FALSE;
 
-       crit_enter();
-       lwkt_gettoken(&vm_token);
-
+       vm_page_spin_lock(m);
        TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
                if (pv->pv_pmap == pmap) {
-                       lwkt_reltoken(&vm_token);
-                       crit_exit();
+                       vm_page_spin_unlock(m);
                        return TRUE;
                }
                loops++;
                if (loops >= 16)
                        break;
        }
-       lwkt_reltoken(&vm_token);
-       crit_exit();
+       vm_page_spin_unlock(m);
+
        return (FALSE);
 }
 
@@ -2979,7 +2992,6 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 
        if (pmap->pm_pteobj)
                vm_object_hold(pmap->pm_pteobj);
-       lwkt_gettoken(&vm_token);
 
        for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
                if (pv->pv_va >= eva || pv->pv_va < sva) {
@@ -3002,12 +3014,13 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
                tpte = pmap_inval_loadandclear(pte, pmap, pv->pv_va);
 
                m = PHYS_TO_VM_PAGE(tpte & VPTE_FRAME);
+               vm_page_spin_lock(m);
 
                KASSERT(m < &vm_page_array[vm_page_array_size],
                        ("pmap_remove_pages: bad tpte %lx", tpte));
 
                KKASSERT(pmap->pm_stats.resident_count > 0);
-               --pmap->pm_stats.resident_count;
+               atomic_add_long(&pmap->pm_stats.resident_count, -1);
 
                /*
                 * Update the vm_page_t clean and reference bits.
@@ -3018,13 +3031,15 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 
                npv = TAILQ_NEXT(pv, pv_plist);
                TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
-               save_generation = ++pmap->pm_generation;
+               atomic_add_int(&pmap->pm_generation, 1);
+               save_generation = pmap->pm_generation;
 
                m->md.pv_list_count--;
                atomic_add_int(&m->object->agg_pv_list_count, -1);
                TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
                if (TAILQ_EMPTY(&m->md.pv_list))
                        vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
+               vm_page_spin_unlock(m);
 
                pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem);
                free_pv_entry(pv);
@@ -3038,7 +3053,6 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
                        npv = TAILQ_FIRST(&pmap->pm_pvlist);
                }
        }
-       lwkt_reltoken(&vm_token);
        if (pmap->pm_pteobj)
                vm_object_drop(pmap->pm_pteobj);
 }
@@ -3058,8 +3072,7 @@ pmap_testbit(vm_page_t m, int bit)
        if (TAILQ_FIRST(&m->md.pv_list) == NULL)
                return FALSE;
 
-       crit_enter();
-
+       vm_page_spin_lock(m);
        TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
                /*
                 * if the bit being tested is the modified bit, then
@@ -3079,11 +3092,11 @@ pmap_testbit(vm_page_t m, int bit)
 #endif
                pte = pmap_pte(pv->pv_pmap, pv->pv_va);
                if (*pte & bit) {
-                       crit_exit();
+                       vm_page_spin_unlock(m);
                        return TRUE;
                }
        }
-       crit_exit();
+       vm_page_spin_unlock(m);
        return (FALSE);
 }
 
@@ -3103,12 +3116,11 @@ pmap_clearbit(vm_page_t m, int bit)
        if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
                return;
 
-       crit_enter();
-
        /*
         * Loop over all current mappings setting/clearing as appropos If
         * setting RO do we need to clear the VAC?
         */
+       vm_page_spin_lock(m);
        TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
                /*
                 * don't write protect pager mappings
@@ -3175,7 +3187,7 @@ pmap_clearbit(vm_page_t m, int bit)
                        }
                }
        }
-       crit_exit();
+       vm_page_spin_unlock(m);
 }
 
 /*
@@ -3188,14 +3200,12 @@ pmap_page_protect(vm_page_t m, vm_prot_t prot)
 {
        /* JG NX support? */
        if ((prot & VM_PROT_WRITE) == 0) {
-               lwkt_gettoken(&vm_token);
                if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
                        pmap_clearbit(m, VPTE_RW);
                        vm_page_flag_clear(m, PG_WRITEABLE);
                } else {
                        pmap_remove_all(m);
                }
-               lwkt_reltoken(&vm_token);
        }
 }
 
@@ -3227,18 +3237,13 @@ pmap_ts_referenced(vm_page_t m)
        if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
                return (rtval);
 
-       crit_enter();
-       lwkt_gettoken(&vm_token);
+       vm_page_spin_lock(m);
 
        if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
-
                pvf = pv;
-
                do {
                        pvn = TAILQ_NEXT(pv, pv_list);
-
                        TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
-
                        TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 
                        if (!pmap_track_modified(pv->pv_pmap, pv->pv_va))
@@ -3255,8 +3260,7 @@ pmap_ts_referenced(vm_page_t m)
                        }
                } while ((pv = pvn) != NULL && pv != pvf);
        }
-       lwkt_reltoken(&vm_token);
-       crit_exit();
+       vm_page_spin_unlock(m);
 
        return (rtval);
 }
@@ -3272,9 +3276,8 @@ pmap_is_modified(vm_page_t m)
 {
        boolean_t res;
 
-       lwkt_gettoken(&vm_token);
        res = pmap_testbit(m, VPTE_M);
-       lwkt_reltoken(&vm_token);
+
        return (res);
 }
 
@@ -3286,9 +3289,7 @@ pmap_is_modified(vm_page_t m)
 void
 pmap_clear_modify(vm_page_t m)
 {
-       lwkt_gettoken(&vm_token);
        pmap_clearbit(m, VPTE_M);
-       lwkt_reltoken(&vm_token);
 }
 
 /*
@@ -3299,9 +3300,7 @@ pmap_clear_modify(vm_page_t m)
 void
 pmap_clear_reference(vm_page_t m)
 {
-       lwkt_gettoken(&vm_token);
        pmap_clearbit(m, VPTE_A);
-       lwkt_reltoken(&vm_token);
 }
 
 /*
@@ -3357,7 +3356,7 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr)
        vm_page_t m;
        int val = 0;
 
-       lwkt_gettoken(&vm_token);
+       vm_object_hold(pmap->pm_pteobj);
        ptep = pmap_pte(pmap, addr);
 
        if (ptep && (pte = *ptep) != 0) {
@@ -3396,7 +3395,8 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr)
                }
        }
 done:
-       lwkt_reltoken(&vm_token);
+       vm_object_drop(pmap->pm_pteobj);
+
        return val;
 }
 
@@ -3528,7 +3528,7 @@ pmap_pgscan(struct pmap_pgscan_info *pginfo)
        pt_entry_t *pte;
        int stop = 0;
 
-       lwkt_gettoken(&vm_token);
+       vm_object_hold(pmap->pm_pteobj);
 
        for (; sva < eva; sva = va_next) {
                if (stop)
@@ -3594,5 +3594,5 @@ pmap_pgscan(struct pmap_pgscan_info *pginfo)
                        }
                }
        }
-       lwkt_reltoken(&vm_token);
+       vm_object_drop(pmap->pm_pteobj);
 }
index d48b58a..38aa9c7 100644 (file)
@@ -46,6 +46,7 @@
 #include <machine/clock.h>
 #include <machine/globaldata.h>
 #include <machine/md_var.h>
+#include <machine/cothread.h>
 
 #include <sys/thread2.h>
 
@@ -54,8 +55,6 @@
 
 #define VKTIMER_FREQ   1000000 /* 1us granularity */
 
-static void vktimer_intr(void *dummy, struct intrframe *frame);
-
 int disable_rtc_set;
 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
           CTLFLAG_RW, &disable_rtc_set, 0, "");
@@ -73,16 +72,18 @@ int wall_cmos_clock = 0;
 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
     CTLFLAG_RD, &wall_cmos_clock, 0, "");
 
-static struct kqueue_info *kqueue_timer_info;
-
 static int cputimer_mib[16];
 static int cputimer_miblen;
+static cothread_t vktimer_cotd;
+static int vktimer_running;
+static struct timespec vktimer_ts;
 
 /*
  * SYSTIMER IMPLEMENTATION
  */
 static sysclock_t vkernel_timer_get_timecount(void);
 static void vkernel_timer_construct(struct cputimer *timer, sysclock_t oclock);
+static void vktimer_thread(cothread_t cotd);
 
 static struct cputimer vkernel_cputimer = {
         SLIST_ENTRY_INITIALIZER,
@@ -182,8 +183,68 @@ static void
 vktimer_intr_initclock(struct cputimer_intr *cti __unused,
                       boolean_t selected __unused)
 {
+       vktimer_ts.tv_nsec = 1000000000 / 20;
+       vktimer_cotd = cothread_create(vktimer_thread, NULL, NULL, "vktimer");
+       while (vktimer_running == 0)
+               usleep(1000000 / 10);
+#if 0
        KKASSERT(kqueue_timer_info == NULL);
        kqueue_timer_info = kqueue_add_timer(vktimer_intr, NULL);
+#endif
+}
+
+/*
+ *
+ */
+static void
+vktimer_sigint(int signo)
+{
+       /* do nothing, just interrupt */
+}
+
+static void
+vktimer_thread(cothread_t cotd)
+{
+        struct sigaction sa;
+       globaldata_t gscan;
+
+        bzero(&sa, sizeof(sa));
+        sa.sa_handler = vktimer_sigint;
+        sa.sa_flags |= SA_NODEFER;
+        sigemptyset(&sa.sa_mask);
+        sigaction(SIGINT, &sa, NULL);
+
+       vktimer_running = 1;
+       while (vktimer_cotd == NULL)
+               usleep(1000000 / 10);
+
+       for (;;) {
+               long old;
+               int n;
+
+               /*
+                * Wait for timeout or interrupt
+                */
+               cothread_sleep(cotd, &vktimer_ts);
+
+               /*
+                * Reinitialize with long timeout
+                */
+               old = 1000000000 / 20;
+               vktimer_ts.tv_nsec = old;
+
+               /*
+                * Poll cpus
+                *
+                * XXX we haven't distributed the timer to each cpu
+                * yet.
+                */
+               for (n = 0; n < ncpus; ++n) {
+                       gscan = globaldata_find(n);
+                       if (TAILQ_FIRST(&gscan->gd_systimerq))
+                               pthread_kill(ap_tids[n], SIGURG);
+               }
+       }
 }
 
 /*
@@ -194,38 +255,31 @@ vktimer_intr_initclock(struct cputimer_intr *cti __unused,
 static void
 vktimer_intr_reload(struct cputimer_intr *cti __unused, sysclock_t reload)
 {
-       if (kqueue_timer_info) {
-               if ((int)reload < 1)
-                       reload = 1;
-               kqueue_reload_timer(kqueue_timer_info, (reload + 999) / 1000);
+       if (reload >= 1000000)
+               reload = 999999999;
+       else
+               reload = reload * 1000;
+
+       if (reload < vktimer_ts.tv_nsec) {
+               while (reload < vktimer_ts.tv_nsec)
+                       reload = atomic_swap_long(&vktimer_ts.tv_nsec, reload);
+               if (vktimer_cotd)
+                       cothread_wakeup(vktimer_cotd, &vktimer_ts);
        }
 }
 
 /*
- * clock interrupt.
- *
- * NOTE: frame is a struct intrframe pointer.
+ * pcpu clock interrupt (hard interrupt)
  */
-static void
-vktimer_intr(void *dummy, struct intrframe *frame)
+void
+vktimer_intr(struct intrframe *frame)
 {
-       static sysclock_t sysclock_count;
        struct globaldata *gd = mycpu;
-        struct globaldata *gscan;
-       int n;
+       sysclock_t sysclock_count;
 
        sysclock_count = sys_cputimer->count();
-       for (n = 0; n < ncpus; ++n) {
-               gscan = globaldata_find(n);
-               if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL)
-                       continue;
-               if (gscan != gd) {
-                       lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr,
-                                       &sysclock_count, 0);
-               } else {
-                       systimer_intr(&sysclock_count, 0, frame);
-               }
-       }
+       ++gd->gd_cnt.v_timer;
+       systimer_intr(&sysclock_count, 0, frame);
 }
 
 /*
index 541b473..0154f32 100644 (file)
@@ -80,6 +80,7 @@ ipisig(int nada, siginfo_t *info, void *ctxp)
 
        if (td->td_critcount == 0) {
                ++td->td_critcount;
+               ++gd->gd_cnt.v_ipi;
                ++gd->gd_intr_nesting_level;
                atomic_swap_int(&gd->gd_npoll, 0);
                lwkt_process_ipiq();
@@ -112,6 +113,7 @@ stopsig(int nada, siginfo_t *info, void *ctxp)
        sigemptyset(&ss);
        sigaddset(&ss, SIGALRM);
        sigaddset(&ss, SIGIO);
+       sigaddset(&ss, SIGURG);
        sigaddset(&ss, SIGQUIT);
        sigaddset(&ss, SIGUSR1);
        sigaddset(&ss, SIGUSR2);
@@ -127,19 +129,52 @@ stopsig(int nada, siginfo_t *info, void *ctxp)
        --td->td_critcount;
 }
 
-#if 0
-
 /*
  * SIGIO is used by cothreads to signal back into the virtual kernel.
  */
 static
 void
-iosig(int nada, siginfo_t *info, void *ctxp)
+kqueuesig(int nada, siginfo_t *info, void *ctxp)
 {
-       signalintr(4);
+       globaldata_t gd = mycpu;
+       thread_t td = gd->gd_curthread;
+
+       if (td->td_critcount == 0) {
+               ++td->td_critcount;
+               ++gd->gd_intr_nesting_level;
+               kqueue_intr(NULL);
+               --gd->gd_intr_nesting_level;
+               --td->td_critcount;
+       } else {
+               need_kqueue();
+       }
 }
 
-#endif
+static
+void
+timersig(int nada, siginfo_t *info, void *ctxp)
+{
+       globaldata_t gd = mycpu;
+       thread_t td = gd->gd_curthread;
+
+       if (td->td_critcount == 0) {
+               ++td->td_critcount;
+               ++gd->gd_intr_nesting_level;
+               vktimer_intr(NULL);
+               --gd->gd_intr_nesting_level;
+               --td->td_critcount;
+       } else {
+               need_timer();
+       }
+}
+
+static
+void
+cosig(int nada, siginfo_t *info, void *ctxp)
+{
+       /* handles critical section checks */
+       signalintr(1);
+}
 
 static
 void
@@ -177,12 +212,19 @@ init_exceptions(void)
 #endif
        sa.sa_sigaction = ipisig;
        sigaction(SIGUSR1, &sa, NULL);
+
        sa.sa_sigaction = stopsig;
        sigaction(SIGXCPU, &sa, NULL);
-#if 0
-       sa.sa_sigaction = iosig;
+
+       sa.sa_sigaction = kqueuesig;
        sigaction(SIGIO, &sa, NULL);
-#endif
+
+       sa.sa_sigaction = timersig;
+       sigaction(SIGURG, &sa, NULL);
+
+       sa.sa_sigaction = cosig;
+       sigaction(SIGALRM, &sa, NULL);
+
        sa.sa_sigaction = infosig;
        sigaction(SIGINFO, &sa, NULL);
 }
index 4487834..56455b5 100644 (file)
@@ -71,7 +71,6 @@ cpumask_t     smp_active_mask = CPUMASK_INITIALIZER_ONLYONE;
 static int     boot_address;
 /* which cpus have been started */
 static cpumask_t smp_startup_mask = CPUMASK_INITIALIZER_ONLYONE;
-int            mp_naps;                /* # of Applications processors */
 static int  mp_finish;
 
 /* Local data for detecting CPU TOPOLOGY */
@@ -145,6 +144,8 @@ start_ap(void *arg __unused)
 /* storage for AP thread IDs */
 pthread_t ap_tids[MAXCPU];
 
+int naps;
+
 void
 mp_start(void)
 {
@@ -152,8 +153,7 @@ mp_start(void)
        int shift;
 
        ncpus = optcpus;
-
-       mp_naps = ncpus - 1;
+       naps = ncpus - 1;
 
        /* ncpus2 -- ncpus rounded down to the nearest power of 2 */
        for (shift = 0; (1 << shift) <= ncpus; ++shift)
@@ -192,7 +192,7 @@ mp_announce(void)
        kprintf("DragonFly/MP: Multiprocessor\n");
        kprintf(" cpu0 (BSP)\n");
 
-       for (x = 1; x <= mp_naps; ++x)
+       for (x = 1; x <= naps; ++x)
                kprintf(" cpu%d (AP)\n", x);
 }
 
@@ -405,8 +405,7 @@ start_all_aps(u_int boot_addr)
        pthread_attr_init(&attr);
 
        vm_object_hold(&kernel_object);
-       for (x = 1; x <= mp_naps; x++)
-       {
+       for (x = 1; x <= naps; ++x) {
                /* Allocate space for the CPU's private space. */
                for (i = 0; i < sizeof(struct mdglobaldata); i += PAGE_SIZE) {
                        va =(vm_offset_t)&CPU_prvspace[x].mdglobaldata + i;
@@ -439,7 +438,7 @@ start_all_aps(u_int boot_addr)
                 gd->gd_PADDR1 = (vpte_t *)ps->PPAGE1;
 #endif
 
-               ipiq_size = sizeof(struct lwkt_ipiq) * (mp_naps + 1);
+               ipiq_size = sizeof(struct lwkt_ipiq) * (naps + 1);
                 gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, ipiq_size,
                                                    VM_SUBSYS_IPIQ);
                 bzero(gd->mi.gd_ipiq, ipiq_size);
@@ -487,7 +486,6 @@ start_all_aps(u_int boot_addr)
 /*
  * CPU TOPOLOGY DETECTION FUNCTIONS.
  */
-
 void
 detect_cpu_topology(void)
 {
@@ -503,16 +501,21 @@ get_chip_ID(int cpuid)
 }
 
 int
+get_chip_ID_from_APICID(int apicid)
+{
+        return apicid >> (logical_CPU_bits + core_bits);
+}
+
+int
 get_core_number_within_chip(int cpuid)
 {
-       return (get_apicid_from_cpuid(cpuid) >> logical_CPU_bits) &
-           ( (1 << core_bits) -1);
+       return ((get_apicid_from_cpuid(cpuid) >> logical_CPU_bits) &
+               ((1 << core_bits) - 1));
 }
 
 int
 get_logical_CPU_number_within_core(int cpuid)
 {
-       return get_apicid_from_cpuid(cpuid) &
-           ( (1 << logical_CPU_bits) -1);
+       return (get_apicid_from_cpuid(cpuid) &
+               ((1 << logical_CPU_bits) - 1));
 }
-
index 87be810..9bcc7a4 100644 (file)
@@ -1378,6 +1378,7 @@ void
 go_user(struct intrframe *frame)
 {
        struct trapframe *tf = (void *)&frame->if_rdi;
+       globaldata_t gd;
        int r;
        void *id;
 
@@ -1429,7 +1430,15 @@ go_user(struct intrframe *frame)
                else
                        id = &curproc->p_vmspace->vm_pmap;
 
-               r = vmspace_ctl(id, VMSPACE_CTL_RUN, tf, &curthread->td_savevext);
+               /*
+                * The GDF_VIRTUSER hack helps statclock() figure out who
+                * the tick belongs to.
+                */
+               gd = mycpu;
+               gd->gd_flags |= GDF_VIRTUSER;
+               r = vmspace_ctl(id, VMSPACE_CTL_RUN, tf,
+                               &curthread->td_savevext);
+               gd->gd_flags &= ~GDF_VIRTUSER;
 
                frame->if_xflags |= PGEX_U;
 #if 0
index 8bba2af..2fc0546 100644 (file)
@@ -202,6 +202,7 @@ typedef struct globaldata *globaldata_t;
 #define RQB_RUNNING            8       /* 0100 */
 #define RQB_SPINNING           9       /* 0200 */
 #define RQB_QUICKRET           10      /* 0400 */
+#define RQB_KQUEUE             11      /* 0800 (only used by vkernel) */
 
 #define RQF_IPIQ               (1 << RQB_IPIQ)
 #define RQF_INTPEND            (1 << RQB_INTPEND)
@@ -213,16 +214,18 @@ typedef struct globaldata *globaldata_t;
 #define RQF_RUNNING            (1 << RQB_RUNNING)
 #define RQF_SPINNING           (1 << RQB_SPINNING)
 #define RQF_QUICKRET           (1 << RQB_QUICKRET)
+#define RQF_KQUEUE             (1 << RQB_KQUEUE)
 
 #define RQF_AST_MASK           (RQF_AST_OWEUPC|RQF_AST_SIGNAL|\
                                RQF_AST_USER_RESCHED|RQF_AST_LWKT_RESCHED)
-#define RQF_IDLECHECK_MASK     (RQF_IPIQ|RQF_INTPEND|RQF_TIMER)
+#define RQF_IDLECHECK_MASK     (RQF_IPIQ|RQF_INTPEND|RQF_TIMER|RQF_KQUEUE)
 #define RQF_IDLECHECK_WK_MASK  (RQF_IDLECHECK_MASK|RQF_AST_LWKT_RESCHED)
 
 /*
  * globaldata flags
  */
 #define GDF_KPRINTF            0x0001  /* kprintf() reentrancy */
+#define GDF_VIRTUSER           0x0002  /* used by vkernel only */
 
 #endif