Merge remote-tracking branch 'origin/vendor/LIBEDIT'
[dragonfly.git] / sys / platform / pc64 / vmm / ept.c
1 /*
2  * Copyright (c) 2003-2013 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Mihai Carabas <mihai.carabas@gmail.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include <sys/systm.h>
36 #include <sys/sfbuf.h>
37 #include <sys/proc.h>
38 #include <sys/thread.h>
39
40 #include <machine/pmap.h>
41 #include <machine/specialreg.h>
42 #include <machine/cpufunc.h>
43 #include <machine/vmm.h>
44
45 #include <vm/vm_extern.h>
46 #include <vm/vm_map.h>
47
48 #include "vmx.h"
49 #include "ept.h"
50 #include "vmm_utils.h"
51 #include "vmm.h"
52
53 static uint64_t pmap_bits_ept[PG_BITS_SIZE];
54 static pt_entry_t pmap_cache_bits_ept[PAT_INDEX_SIZE];
55 static uint64_t ept_protection_codes[PROTECTION_CODES_SIZE];
56 static pt_entry_t pmap_cache_mask_ept;
57
58 static int pmap_pm_flags_ept = PMAP_HVM;
59 static int eptp_bits;
60
61 extern uint64_t vmx_ept_vpid_cap;
62
63 int
64 vmx_ept_init(void)
65 {
66         int prot;
67         /* Chapter 28 VMX SUPPORT FOR ADDRESS TRANSLATION
68          * Intel Manual 3c, page 107
69          */
70         vmx_ept_vpid_cap = rdmsr(IA32_VMX_EPT_VPID_CAP);
71
72         if(!EPT_PWL4(vmx_ept_vpid_cap)||
73             !EPT_MEMORY_TYPE_WB(vmx_ept_vpid_cap)) {
74                 return EINVAL;
75         }
76
77         eptp_bits |= EPTP_CACHE(PAT_WRITE_BACK) |
78             EPTP_PWLEN(EPT_PWLEVELS - 1);
79
80         if (EPT_AD_BITS_SUPPORTED(vmx_ept_vpid_cap)) {
81                 eptp_bits |= EPTP_AD_ENABLE;
82         } else {
83                 pmap_pm_flags_ept |= PMAP_EMULATE_AD_BITS;
84         }
85
86         /* Initialize EPT bits
87          * - for PG_V - set READ and EXECUTE to preserve compatibility
88          * - for PG_U and PG_G - set 0 to preserve compatiblity
89          * - for PG_N - set the Uncacheable bit
90          */
91         pmap_bits_ept[TYPE_IDX] = EPT_PMAP;
92         pmap_bits_ept[PG_V_IDX] = EPT_PG_READ | EPT_PG_EXECUTE;
93         pmap_bits_ept[PG_RW_IDX] = EPT_PG_WRITE;
94         pmap_bits_ept[PG_PS_IDX] = EPT_PG_PS;
95         pmap_bits_ept[PG_G_IDX] = 0;
96         pmap_bits_ept[PG_U_IDX] = 0;
97         pmap_bits_ept[PG_A_IDX] = EPT_PG_A;
98         pmap_bits_ept[PG_M_IDX] = EPT_PG_M;
99         pmap_bits_ept[PG_W_IDX] = EPT_PG_AVAIL1;
100         pmap_bits_ept[PG_MANAGED_IDX] = EPT_PG_AVAIL2;
101         pmap_bits_ept[PG_DEVICE_IDX] = EPT_PG_AVAIL3;
102         pmap_bits_ept[PG_N_IDX] = EPT_IGNORE_PAT | EPT_MEM_TYPE_UC;
103         pmap_bits_ept[PG_NX_IDX] = 0;   /* XXX inverted sense */
104
105         pmap_cache_mask_ept = EPT_IGNORE_PAT | EPT_MEM_TYPE_MASK;
106
107         pmap_cache_bits_ept[PAT_UNCACHEABLE] = EPT_IGNORE_PAT | EPT_MEM_TYPE_UC;
108         pmap_cache_bits_ept[PAT_WRITE_COMBINING] = EPT_IGNORE_PAT | EPT_MEM_TYPE_WC;
109         pmap_cache_bits_ept[PAT_WRITE_THROUGH] = EPT_IGNORE_PAT | EPT_MEM_TYPE_WT;
110         pmap_cache_bits_ept[PAT_WRITE_PROTECTED] = EPT_IGNORE_PAT | EPT_MEM_TYPE_WP;
111         pmap_cache_bits_ept[PAT_WRITE_BACK] = EPT_IGNORE_PAT | EPT_MEM_TYPE_WB;
112         pmap_cache_bits_ept[PAT_UNCACHED] = EPT_IGNORE_PAT | EPT_MEM_TYPE_UC;
113
114         for (prot = 0; prot < PROTECTION_CODES_SIZE; prot++) {
115                 switch (prot) {
116                 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
117                 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
118                 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
119                 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
120                         ept_protection_codes[prot] = 0;
121                         break;
122                 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
123                 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
124                 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
125                 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
126                         ept_protection_codes[prot] = pmap_bits_ept[PG_RW_IDX];
127
128                         break;
129                 }
130         }
131
132         return 0;
133 }
134
135 /* Build the VMCS_EPTP pointer
136  * - the ept_address
137  * - the EPTP bits indicating optional features
138  */
139 uint64_t vmx_eptp(uint64_t ept_address)
140 {
141         return (ept_address | eptp_bits);
142 }
143
144 /* Copyin from guest VMM */
145 static int
146 ept_copyin(const void *udaddr, void *kaddr, size_t len)
147 {
148         struct lwbuf *lwb;
149         struct lwbuf lwb_cache;
150         vm_page_t m;
151         register_t gpa;
152         size_t n;
153         int error;
154         struct vmspace *vm = curproc->p_vmspace;
155         struct vmx_thread_info *vti = curthread->td_vmm;
156         register_t guest_cr3 = vti->guest_cr3;
157
158         error = 0;
159
160         while (len) {
161                 /* Get the GPA by manually walking the-GUEST page table*/
162                 error = guest_phys_addr(vm, &gpa, guest_cr3,
163                                         (vm_offset_t)udaddr);
164                 if (error) {
165                         kprintf("%s: could not get guest_phys_addr\n",
166                                 __func__);
167                         break;
168                 }
169
170                 m = vm_fault_page(&vm->vm_map, trunc_page(gpa),
171                                   VM_PROT_READ, VM_FAULT_NORMAL,
172                                   &error, NULL);
173                 if (error) {
174                         if (vmm_debug) {
175                                 kprintf("%s: could not fault in "
176                                         "vm map, gpa: %jx\n",
177                                         __func__,
178                                         (uintmax_t)gpa);
179                         }
180                         break;
181                 }
182
183                 n = PAGE_SIZE - ((vm_offset_t)udaddr & PAGE_MASK);
184                 if (n > len)
185                         n = len;
186
187                 lwb = lwbuf_alloc(m, &lwb_cache);
188                 bcopy((char *)lwbuf_kva(lwb) +
189                        ((vm_offset_t)udaddr & PAGE_MASK),
190                       kaddr, n);
191                 len -= n;
192                 udaddr = (const char *)udaddr + n;
193                 kaddr = (char *)kaddr + n;
194                 lwbuf_free(lwb);
195                 vm_page_unhold(m);
196         }
197         if (error)
198                 error = EFAULT;
199         return (error);
200 }
201
202 /* Copyout from guest VMM */
203 static int
204 ept_copyout(const void *kaddr, void *udaddr, size_t len)
205 {
206         struct lwbuf *lwb;
207         struct lwbuf lwb_cache;
208         vm_page_t m;
209         register_t gpa;
210         size_t n;
211         int error;
212         struct vmspace *vm = curproc->p_vmspace;
213         struct vmx_thread_info *vti = curthread->td_vmm;
214         register_t guest_cr3 = vti->guest_cr3;
215
216         error = 0;
217
218         while (len) {
219                 int busy;
220
221                 /* Get the GPA by manually walking the-GUEST page table*/
222                 error = guest_phys_addr(vm, &gpa, guest_cr3,
223                                         (vm_offset_t)udaddr);
224                 if (error) {
225                         kprintf("%s: could not get guest_phys_addr\n",
226                                 __func__);
227                         break;
228                 }
229
230                 m = vm_fault_page(&vm->vm_map, trunc_page(gpa),
231                                   VM_PROT_READ | VM_PROT_WRITE,
232                                   VM_FAULT_NORMAL,
233                                   &error, &busy);
234                 if (error) {
235                         if (vmm_debug) {
236                                 kprintf("%s: could not fault in vm map, "
237                                         "gpa: 0x%jx\n",
238                                         __func__,
239                                         (uintmax_t)gpa);
240                         }
241                         break;
242                 }
243
244                 n = PAGE_SIZE - ((vm_offset_t)udaddr & PAGE_MASK);
245                 if (n > len)
246                         n = len;
247
248                 lwb = lwbuf_alloc(m, &lwb_cache);
249                 bcopy(kaddr, (char *)lwbuf_kva(lwb) +
250                              ((vm_offset_t)udaddr & PAGE_MASK), n);
251
252                 len -= n;
253                 udaddr = (char *)udaddr + n;
254                 kaddr = (const char *)kaddr + n;
255                 vm_page_dirty(m);
256 #if 0
257                 /* should not be needed */
258                 cpu_invlpg((char *)lwbuf_kva(lwb) +
259                              ((vm_offset_t)udaddr & PAGE_MASK));
260 #endif
261                 lwbuf_free(lwb);
262                 if (busy)
263                         vm_page_wakeup(m);
264                 else
265                         vm_page_unhold(m);
266         }
267         if (error)
268                 error = EFAULT;
269         return (error);
270 }
271
272 static int
273 ept_copyinstr(const void *udaddr, void *kaddr, size_t len, size_t *res)
274 {
275         int error;
276         size_t n;
277         const char *uptr = udaddr;
278         char *kptr = kaddr;
279
280         if (res)
281                 *res = 0;
282         while (len) {
283                 n = PAGE_SIZE - ((vm_offset_t)uptr & PAGE_MASK);
284                 if (n > 32)
285                         n = 32;
286                 if (n > len)
287                         n = len;
288                 if ((error = ept_copyin(uptr, kptr, n)) != 0)
289                         return(error);
290                 while (n) {
291                         if (res)
292                                 ++*res;
293                         if (*kptr == 0)
294                                 return(0);
295                         ++kptr;
296                         ++uptr;
297                         --n;
298                         --len;
299                 }
300
301         }
302         return(ENAMETOOLONG);
303 }
304
305
306 static int
307 ept_fubyte(const uint8_t *base)
308 {
309         uint8_t c = 0;
310
311         if (ept_copyin(base, &c, 1) == 0)
312                 return((int)c);
313         return(-1);
314 }
315
316 static int
317 ept_subyte(uint8_t *base, uint8_t byte)
318 {
319         unsigned char c = byte;
320
321         if (ept_copyout(&c, base, 1) == 0)
322                 return(0);
323         return(-1);
324 }
325
326 static int32_t
327 ept_fuword32(const uint32_t *base)
328 {
329         uint32_t v;
330
331         if (ept_copyin(base, &v, sizeof(v)) == 0)
332                 return(v);
333         return(-1);
334 }
335
336 static int64_t
337 ept_fuword64(const uint64_t *base)
338 {
339         uint64_t v;
340
341         if (ept_copyin(base, &v, sizeof(v)) == 0)
342                 return(v);
343         return(-1);
344 }
345
346 static int
347 ept_suword64(uint64_t *base, uint64_t word)
348 {
349         if (ept_copyout(&word, base, sizeof(word)) == 0)
350                 return(0);
351         return(-1);
352 }
353
354 static int
355 ept_suword32(uint32_t *base, int word)
356 {
357         if (ept_copyout(&word, base, sizeof(word)) == 0)
358                 return(0);
359         return(-1);
360 }
361
362 static uint32_t
363 ept_swapu32(volatile uint32_t *uaddr, uint32_t v)
364 {
365         struct lwbuf *lwb;
366         struct lwbuf lwb_cache;
367         vm_page_t m;
368         register_t gpa;
369         size_t n;
370         int error;
371         struct vmspace *vm = curproc->p_vmspace;
372         struct vmx_thread_info *vti = curthread->td_vmm;
373         register_t guest_cr3 = vti->guest_cr3;
374         volatile void *ptr;
375         int busy;
376
377         /* Get the GPA by manually walking the-GUEST page table*/
378         error = guest_phys_addr(vm, &gpa, guest_cr3, (vm_offset_t)uaddr);
379         if (error) {
380                 kprintf("%s: could not get guest_phys_addr\n", __func__);
381                 return EFAULT;
382         }
383         m = vm_fault_page(&vm->vm_map, trunc_page(gpa),
384                           VM_PROT_READ | VM_PROT_WRITE,
385                           VM_FAULT_NORMAL,
386                           &error, &busy);
387         if (error) {
388                 if (vmm_debug) {
389                         kprintf("%s: could not fault in vm map, gpa: %llx\n",
390                                 __func__, (unsigned long long) gpa);
391                 }
392                 return EFAULT;
393         }
394
395         n = PAGE_SIZE - ((vm_offset_t)uaddr & PAGE_MASK);
396         if (n < sizeof(uint32_t)) {
397                 error = EFAULT;
398                 v = (uint32_t)-error;
399                 goto done;
400         }
401
402         lwb = lwbuf_alloc(m, &lwb_cache);
403         ptr = (void *)(lwbuf_kva(lwb) + ((vm_offset_t)uaddr & PAGE_MASK));
404         v = atomic_swap_int(ptr, v);
405
406         vm_page_dirty(m);
407         lwbuf_free(lwb);
408         error = 0;
409 done:
410         if (busy)
411                 vm_page_wakeup(m);
412         else
413                 vm_page_unhold(m);
414         return v;
415 }
416
417 static uint64_t
418 ept_swapu64(volatile uint64_t *uaddr, uint64_t v)
419 {
420         struct lwbuf *lwb;
421         struct lwbuf lwb_cache;
422         vm_page_t m;
423         register_t gpa;
424         size_t n;
425         int error;
426         struct vmspace *vm = curproc->p_vmspace;
427         struct vmx_thread_info *vti = curthread->td_vmm;
428         register_t guest_cr3 = vti->guest_cr3;
429         volatile void *ptr;
430         int busy;
431
432         /* Get the GPA by manually walking the-GUEST page table*/
433         error = guest_phys_addr(vm, &gpa, guest_cr3, (vm_offset_t)uaddr);
434         if (error) {
435                 kprintf("%s: could not get guest_phys_addr\n", __func__);
436                 return EFAULT;
437         }
438         m = vm_fault_page(&vm->vm_map, trunc_page(gpa),
439                           VM_PROT_READ | VM_PROT_WRITE,
440                           VM_FAULT_NORMAL,
441                           &error, &busy);
442         if (error) {
443                 if (vmm_debug) {
444                         kprintf("%s: could not fault in vm map, gpa: %llx\n",
445                                 __func__, (unsigned long long) gpa);
446                 }
447                 return EFAULT;
448         }
449
450         n = PAGE_SIZE - ((vm_offset_t)uaddr & PAGE_MASK);
451         if (n < sizeof(uint64_t)) {
452                 error = EFAULT;
453                 v = (uint64_t)-error;
454                 goto done;
455         }
456
457         lwb = lwbuf_alloc(m, &lwb_cache);
458         ptr = (void *)(lwbuf_kva(lwb) + ((vm_offset_t)uaddr & PAGE_MASK));
459         v = atomic_swap_long(ptr, v);
460
461         vm_page_dirty(m);
462         lwbuf_free(lwb);
463         error = 0;
464 done:
465         if (busy)
466                 vm_page_wakeup(m);
467         else
468                 vm_page_unhold(m);
469         return v;
470 }
471
472 static uint32_t
473 ept_fuwordadd32(volatile uint32_t *uaddr, uint32_t v)
474 {
475         struct lwbuf *lwb;
476         struct lwbuf lwb_cache;
477         vm_page_t m;
478         register_t gpa;
479         size_t n;
480         int error;
481         struct vmspace *vm = curproc->p_vmspace;
482         struct vmx_thread_info *vti = curthread->td_vmm;
483         register_t guest_cr3 = vti->guest_cr3;
484         volatile void *ptr;
485         int busy;
486
487         /* Get the GPA by manually walking the-GUEST page table*/
488         error = guest_phys_addr(vm, &gpa, guest_cr3, (vm_offset_t)uaddr);
489         if (error) {
490                 kprintf("%s: could not get guest_phys_addr\n", __func__);
491                 return EFAULT;
492         }
493         m = vm_fault_page(&vm->vm_map, trunc_page(gpa),
494                           VM_PROT_READ | VM_PROT_WRITE,
495                           VM_FAULT_NORMAL,
496                           &error, &busy);
497         if (error) {
498                 if (vmm_debug) {
499                         kprintf("%s: could not fault in vm map, gpa: %llx\n",
500                                 __func__, (unsigned long long) gpa);
501                 }
502                 return EFAULT;
503         }
504
505         n = PAGE_SIZE - ((vm_offset_t)uaddr & PAGE_MASK);
506         if (n < sizeof(uint32_t)) {
507                 error = EFAULT;
508                 v = (uint32_t)-error;
509                 goto done;
510         }
511
512         lwb = lwbuf_alloc(m, &lwb_cache);
513         ptr = (void *)(lwbuf_kva(lwb) + ((vm_offset_t)uaddr & PAGE_MASK));
514         v = atomic_fetchadd_int(ptr, v);
515
516         vm_page_dirty(m);
517         lwbuf_free(lwb);
518         error = 0;
519 done:
520         if (busy)
521                 vm_page_wakeup(m);
522         else
523                 vm_page_unhold(m);
524         return v;
525 }
526
527 static uint64_t
528 ept_fuwordadd64(volatile uint64_t *uaddr, uint64_t v)
529 {
530         struct lwbuf *lwb;
531         struct lwbuf lwb_cache;
532         vm_page_t m;
533         register_t gpa;
534         size_t n;
535         int error;
536         struct vmspace *vm = curproc->p_vmspace;
537         struct vmx_thread_info *vti = curthread->td_vmm;
538         register_t guest_cr3 = vti->guest_cr3;
539         volatile void *ptr;
540         int busy;
541
542         /* Get the GPA by manually walking the-GUEST page table*/
543         error = guest_phys_addr(vm, &gpa, guest_cr3, (vm_offset_t)uaddr);
544         if (error) {
545                 kprintf("%s: could not get guest_phys_addr\n", __func__);
546                 return EFAULT;
547         }
548         m = vm_fault_page(&vm->vm_map, trunc_page(gpa),
549                           VM_PROT_READ | VM_PROT_WRITE,
550                           VM_FAULT_NORMAL,
551                           &error, &busy);
552         if (error) {
553                 if (vmm_debug) {
554                         kprintf("%s: could not fault in vm map, gpa: %llx\n",
555                                 __func__, (unsigned long long) gpa);
556                 }
557                 return EFAULT;
558         }
559
560         n = PAGE_SIZE - ((vm_offset_t)uaddr & PAGE_MASK);
561         if (n < sizeof(uint64_t)) {
562                 error = EFAULT;
563                 v = (uint64_t)-error;
564                 goto done;
565         }
566
567         lwb = lwbuf_alloc(m, &lwb_cache);
568         ptr = (void *)(lwbuf_kva(lwb) + ((vm_offset_t)uaddr & PAGE_MASK));
569         v = atomic_fetchadd_long(ptr, v);
570
571         vm_page_dirty(m);
572         lwbuf_free(lwb);
573         error = 0;
574 done:
575         if (busy)
576                 vm_page_wakeup(m);
577         else
578                 vm_page_unhold(m);
579         return v;
580 }
581
582 void
583 vmx_ept_pmap_pinit(pmap_t pmap)
584 {
585         pmap->pm_flags |= pmap_pm_flags_ept;
586
587         bcopy(pmap_bits_ept, pmap->pmap_bits, sizeof(pmap_bits_ept));
588         bcopy(ept_protection_codes, pmap->protection_codes,
589               sizeof(ept_protection_codes));
590         bcopy(pmap_cache_bits_ept, pmap->pmap_cache_bits,
591               sizeof(pmap_cache_bits_ept));
592         pmap->pmap_cache_mask = pmap_cache_mask_ept;
593         pmap->copyinstr = ept_copyinstr;
594         pmap->copyin = ept_copyin;
595         pmap->copyout = ept_copyout;
596         pmap->fubyte = ept_fubyte;
597         pmap->subyte = ept_subyte;
598         pmap->fuword32 = ept_fuword32;
599         pmap->fuword64 = ept_fuword64;
600         pmap->suword32 = ept_suword32;
601         pmap->suword64 = ept_suword64;
602         pmap->swapu32 = ept_swapu32;
603         pmap->swapu64 = ept_swapu64;
604         pmap->fuwordadd32 = ept_fuwordadd32;
605         pmap->fuwordadd64 = ept_fuwordadd64;
606 }