kernel - Optimize lwp-specific signaling.
[dragonfly.git] / sys / kern / sys_vmm.c
1 /*
2  * Copyright (c) 2013 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Mihai Carabas <mihai.carabas@gmail.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/sysproto.h>
38 #include <sys/proc.h>
39 #include <sys/user.h>
40 #include <sys/wait.h>
41 #include <sys/vmm.h>
42
43 #include <sys/thread2.h>
44 #include <sys/spinlock2.h>
45
46 #include <machine/cpu.h>
47 #include <machine/vmm.h>
48
49 /*
50  * vmm guest system call:
51  * - init the calling thread structure
52  * - prepare for running in non-root mode
53  */
54 int
55 sys_vmm_guest_ctl(struct vmm_guest_ctl_args *uap)
56 {
57         int error = 0;
58         struct vmm_guest_options options;
59         struct trapframe *tf = uap->sysmsg_frame;
60         unsigned long stack_limit = USRSTACK;
61         unsigned char stack_page[PAGE_SIZE];
62
63         clear_quickret();
64
65         switch (uap->op) {
66         case VMM_GUEST_RUN:
67                 error = copyin(uap->options, &options,
68                                sizeof(struct vmm_guest_options));
69                 if (error) {
70                         kprintf("%s: error copyin vmm_guest_options\n",
71                                 __func__);
72                         goto out;
73                 }
74
75                 while(stack_limit > tf->tf_sp) {
76                         stack_limit -= PAGE_SIZE;
77                         options.new_stack -= PAGE_SIZE;
78
79                         error = copyin((const void *)stack_limit,
80                                        (void *)stack_page, PAGE_SIZE);
81                         if (error) {
82                                 kprintf("%s: error copyin stack\n",
83                                         __func__);
84                                 goto out;
85                         }
86
87                         error = copyout((const void *)stack_page,
88                                         (void *)options.new_stack, PAGE_SIZE);
89                         if (error) {
90                                 kprintf("%s: error copyout stack\n",
91                                     __func__);
92                                 goto out;
93                         }
94                 }
95
96                 bcopy(tf, &options.tf, sizeof(struct trapframe));
97
98                 error = vmm_vminit(&options);
99                 if (error) {
100                         if (error == ENODEV) {
101                                 kprintf("%s: vmm_vminit failed - "
102                                         "no VMM available \n", __func__);
103                                 goto out;
104                         }
105                         kprintf("%s: vmm_vminit failed\n", __func__);
106                         goto out_exit;
107                 }
108
109                 generic_lwp_return(curthread->td_lwp, tf);
110
111                 error = vmm_vmrun();
112
113                 break;
114         default:
115                 kprintf("%s: INVALID op\n", __func__);
116                 error = EINVAL;
117                 goto out;
118         }
119 out_exit:
120         exit1(W_EXITCODE(error, 0));
121 out:
122         return (error);
123 }
124
125 /*
126  * The remote IPI will force the cpu out of any VMM mode it is
127  * in.  When combined with bumping pm_invgen we can ensure that
128  * INVEPT will be called when it returns.
129  */
130 static void
131 vmm_exit_vmm(void *dummy __unused)
132 {
133 }
134
135 int
136 sys_vmm_guest_sync_addr(struct vmm_guest_sync_addr_args *uap)
137 {
138         int error = 0;
139         cpulock_t olock;
140         cpulock_t nlock;
141         cpumask_t mask;
142         long val;
143         struct proc *p = curproc;
144
145         if (p->p_vmm == NULL)
146                 return ENOSYS;
147
148         crit_enter_id("vmm_inval");
149
150         /*
151          * Acquire CPULOCK_EXCL, spin while we wait.  This will prevent
152          * any other cpu trying to use related VMMs to wait for us.
153          */
154         KKASSERT(CPUMASK_TESTMASK(p->p_vmm_cpumask, mycpu->gd_cpumask) == 0);
155         for (;;) {
156                 olock = p->p_vmm_cpulock & ~CPULOCK_EXCL;
157                 cpu_ccfence();
158                 nlock = olock | CPULOCK_EXCL;
159                 if (atomic_cmpset_int(&p->p_vmm_cpulock, olock, nlock))
160                         break;
161                 lwkt_process_ipiq();
162                 cpu_pause();
163         }
164
165         /*
166          * Wait for other cpu's to exit VMM mode (for this vkernel).  No
167          * new cpus will enter VMM mode while we hold the lock.  New waiters
168          * may turn-up though so the wakeup() later on has to be
169          * unconditional.
170          *
171          * We must test on p_vmm_cpulock's counter, not the mask, because
172          * VMM entries will set the mask bit unconditionally first
173          * (interlocking our IPI below) and then conditionally bump the
174          * counter.
175          */
176         if (olock & CPULOCK_CNTMASK) {
177                 mask = p->p_vmm_cpumask;
178                 CPUMASK_ANDMASK(mask, mycpu->gd_other_cpus);
179                 lwkt_send_ipiq_mask(mask, vmm_exit_vmm, NULL);
180                 while (p->p_vmm_cpulock & CPULOCK_CNTMASK) {
181                         lwkt_process_ipiq();
182                         cpu_pause();
183                 }
184         }
185
186 #ifndef _KERNEL_VIRTUAL
187         /*
188          * Ensure that any new entries into VMM mode using
189          * vmm's managed under this process will issue a
190          * INVEPT before resuming.
191          */
192         atomic_add_acq_long(&p->p_vmspace->vm_pmap.pm_invgen, 1);
193 #endif
194
195         /*
196          * Make the requested modification, wakeup any waiters.
197          */
198         if (uap->srcaddr) {
199                 copyin(uap->srcaddr, &val, sizeof(long));
200                 copyout(&val, uap->dstaddr, sizeof(long));
201         }
202
203         /*
204          * VMMs on remote cpus will not be re-entered until we
205          * clear the lock.
206          */
207         atomic_clear_int(&p->p_vmm_cpulock, CPULOCK_EXCL);
208 #if 0
209         wakeup(&p->p_vmm_cpulock);
210 #endif
211
212         crit_exit_id("vmm_inval");
213
214         return error;
215 }