kernel - Rename vm_map_wire() and vm_map_unwire()
[dragonfly.git] / sys / dev / virtual / nvmm / nvmm.c
1 /*
2  * Copyright (c) 2018-2021 Maxime Villard, m00nbsd.net
3  * All rights reserved.
4  *
5  * This code is part of the NVMM hypervisor.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31
32 #include <sys/kernel.h>
33 #include <sys/mman.h>
34
35 #include "nvmm.h"
36 #include "nvmm_internal.h"
37 #include "nvmm_ioctl.h"
38
39 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
40 volatile unsigned int nmachines __cacheline_aligned;
41
42 static const struct nvmm_impl *nvmm_impl_list[] = {
43 #if defined(__x86_64__)
44         &nvmm_x86_svm,  /* x86 AMD SVM */
45         &nvmm_x86_vmx   /* x86 Intel VMX */
46 #endif
47 };
48
49 const struct nvmm_impl *nvmm_impl __read_mostly = NULL;
50
51 struct nvmm_owner nvmm_root_owner;
52
53 /* -------------------------------------------------------------------------- */
54
55 static int
56 nvmm_machine_alloc(struct nvmm_machine **ret)
57 {
58         struct nvmm_machine *mach;
59         size_t i;
60
61         for (i = 0; i < NVMM_MAX_MACHINES; i++) {
62                 mach = &machines[i];
63
64                 os_rwl_wlock(&mach->lock);
65                 if (mach->present) {
66                         os_rwl_unlock(&mach->lock);
67                         continue;
68                 }
69
70                 mach->present = true;
71                 mach->time = time_second;
72                 *ret = mach;
73                 os_atomic_inc_uint(&nmachines);
74                 return 0;
75         }
76
77         return ENOBUFS;
78 }
79
80 static void
81 nvmm_machine_free(struct nvmm_machine *mach)
82 {
83         OS_ASSERT(os_rwl_wheld(&mach->lock));
84         OS_ASSERT(mach->present);
85         mach->present = false;
86         os_atomic_dec_uint(&nmachines);
87 }
88
89 static int
90 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid,
91     struct nvmm_machine **ret, bool writer)
92 {
93         struct nvmm_machine *mach;
94
95         if (__predict_false(machid >= NVMM_MAX_MACHINES)) {
96                 return EINVAL;
97         }
98         mach = &machines[machid];
99
100         if (__predict_false(writer)) {
101                 os_rwl_wlock(&mach->lock);
102         } else {
103                 os_rwl_rlock(&mach->lock);
104         }
105         if (__predict_false(!mach->present)) {
106                 os_rwl_unlock(&mach->lock);
107                 return ENOENT;
108         }
109         if (__predict_false(mach->owner != owner &&
110             owner != &nvmm_root_owner)) {
111                 os_rwl_unlock(&mach->lock);
112                 return EPERM;
113         }
114         *ret = mach;
115
116         return 0;
117 }
118
119 static void
120 nvmm_machine_put(struct nvmm_machine *mach)
121 {
122         os_rwl_unlock(&mach->lock);
123 }
124
125 /* -------------------------------------------------------------------------- */
126
127 static int
128 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
129     struct nvmm_cpu **ret)
130 {
131         struct nvmm_cpu *vcpu;
132
133         if (cpuid >= NVMM_MAX_VCPUS) {
134                 return EINVAL;
135         }
136         vcpu = &mach->cpus[cpuid];
137
138         os_mtx_lock(&vcpu->lock);
139         if (vcpu->present) {
140                 os_mtx_unlock(&vcpu->lock);
141                 return EBUSY;
142         }
143
144         vcpu->present = true;
145         vcpu->comm = NULL;
146         vcpu->hcpu_last = -1;
147         *ret = vcpu;
148         return 0;
149 }
150
151 static void
152 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
153 {
154         OS_ASSERT(os_mtx_owned(&vcpu->lock));
155         vcpu->present = false;
156         if (vcpu->comm != NULL) {
157                 os_vmobj_unmap(os_kernel_map, (vaddr_t)vcpu->comm,
158                     (vaddr_t)vcpu->comm + NVMM_COMM_PAGE_SIZE, true);
159                 /*
160                  * Require userland to unmap the comm page from its address
161                  * space, because os_curproc_map at this point (fd close)
162                  * is not guaranteed to be the correct address space.
163                  */
164         }
165 }
166
167 static int
168 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
169     struct nvmm_cpu **ret)
170 {
171         struct nvmm_cpu *vcpu;
172
173         if (__predict_false(cpuid >= NVMM_MAX_VCPUS)) {
174                 return EINVAL;
175         }
176         vcpu = &mach->cpus[cpuid];
177
178         os_mtx_lock(&vcpu->lock);
179         if (__predict_false(!vcpu->present)) {
180                 os_mtx_unlock(&vcpu->lock);
181                 return ENOENT;
182         }
183         *ret = vcpu;
184
185         return 0;
186 }
187
188 static void
189 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
190 {
191         os_mtx_unlock(&vcpu->lock);
192 }
193
194 /* -------------------------------------------------------------------------- */
195
196 void
197 nvmm_kill_machines(struct nvmm_owner *owner)
198 {
199         struct nvmm_machine *mach;
200         struct nvmm_cpu *vcpu;
201         size_t i, j;
202         int error;
203
204         for (i = 0; i < NVMM_MAX_MACHINES; i++) {
205                 mach = &machines[i];
206
207                 os_rwl_wlock(&mach->lock);
208                 if (!mach->present || mach->owner != owner) {
209                         os_rwl_unlock(&mach->lock);
210                         continue;
211                 }
212
213                 /* Kill it. */
214                 for (j = 0; j < NVMM_MAX_VCPUS; j++) {
215                         error = nvmm_vcpu_get(mach, j, &vcpu);
216                         if (error)
217                                 continue;
218                         (*nvmm_impl->vcpu_destroy)(mach, vcpu);
219                         nvmm_vcpu_free(mach, vcpu);
220                         nvmm_vcpu_put(vcpu);
221                         os_atomic_dec_uint(&mach->ncpus);
222                 }
223                 (*nvmm_impl->machine_destroy)(mach);
224                 os_vmspace_destroy(mach->vm);
225
226                 /* Drop the kernel vmobj refs. */
227                 for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) {
228                         if (!mach->hmap[j].present)
229                                 continue;
230                         os_vmobj_rel(mach->hmap[j].vmobj);
231                 }
232
233                 nvmm_machine_free(mach);
234
235                 os_rwl_unlock(&mach->lock);
236         }
237 }
238
239 /* -------------------------------------------------------------------------- */
240
241 static int
242 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args)
243 {
244         args->cap.version = NVMM_KERN_VERSION;
245         args->cap.state_size = nvmm_impl->state_size;
246         args->cap.comm_size = NVMM_COMM_PAGE_SIZE;
247         args->cap.max_machines = NVMM_MAX_MACHINES;
248         args->cap.max_vcpus = NVMM_MAX_VCPUS;
249         args->cap.max_ram = NVMM_MAX_RAM;
250
251         (*nvmm_impl->capability)(&args->cap);
252
253         return 0;
254 }
255
256 static int
257 nvmm_machine_create(struct nvmm_owner *owner,
258     struct nvmm_ioc_machine_create *args)
259 {
260         struct nvmm_machine *mach;
261         int error;
262
263         error = nvmm_machine_alloc(&mach);
264         if (error)
265                 return error;
266
267         /* Curproc owns the machine. */
268         mach->owner = owner;
269
270         /* Zero out the host mappings. */
271         memset(&mach->hmap, 0, sizeof(mach->hmap));
272
273         /* Create the machine vmspace. */
274         mach->gpa_begin = 0;
275         mach->gpa_end = NVMM_MAX_RAM;
276         mach->vm = os_vmspace_create(mach->gpa_begin, mach->gpa_end);
277
278 #ifdef __DragonFly__
279         /*
280          * Set PMAP_MULTI on the backing pmap for the machine.  Only
281          * pmap changes to the backing pmap for the machine affect the
282          * guest.  Changes to the host's pmap do not affect the guest's
283          * backing pmap.
284          */
285         pmap_maybethreaded(&mach->vm->vm_pmap);
286 #endif
287
288         /* Create the comm vmobj. */
289         mach->commvmobj = os_vmobj_create(
290             NVMM_MAX_VCPUS * NVMM_COMM_PAGE_SIZE);
291
292         (*nvmm_impl->machine_create)(mach);
293
294         args->machid = mach->machid;
295         nvmm_machine_put(mach);
296
297         return 0;
298 }
299
300 static int
301 nvmm_machine_destroy(struct nvmm_owner *owner,
302     struct nvmm_ioc_machine_destroy *args)
303 {
304         struct nvmm_machine *mach;
305         struct nvmm_cpu *vcpu;
306         int error;
307         size_t i;
308
309         error = nvmm_machine_get(owner, args->machid, &mach, true);
310         if (error)
311                 return error;
312
313         for (i = 0; i < NVMM_MAX_VCPUS; i++) {
314                 error = nvmm_vcpu_get(mach, i, &vcpu);
315                 if (error)
316                         continue;
317
318                 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
319                 nvmm_vcpu_free(mach, vcpu);
320                 nvmm_vcpu_put(vcpu);
321                 os_atomic_dec_uint(&mach->ncpus);
322         }
323
324         (*nvmm_impl->machine_destroy)(mach);
325
326         /* Free the machine vmspace. */
327         os_vmspace_destroy(mach->vm);
328
329         /* Drop the kernel vmobj refs. */
330         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
331                 if (!mach->hmap[i].present)
332                         continue;
333                 os_vmobj_rel(mach->hmap[i].vmobj);
334         }
335
336         nvmm_machine_free(mach);
337         nvmm_machine_put(mach);
338
339         return 0;
340 }
341
342 static int
343 nvmm_machine_configure(struct nvmm_owner *owner,
344     struct nvmm_ioc_machine_configure *args)
345 {
346         struct nvmm_machine *mach;
347         size_t allocsz;
348         uint64_t op;
349         void *data;
350         int error;
351
352         op = NVMM_MACH_CONF_MD(args->op);
353         if (__predict_false(op >= nvmm_impl->mach_conf_max)) {
354                 return EINVAL;
355         }
356
357         allocsz = nvmm_impl->mach_conf_sizes[op];
358         data = os_mem_alloc(allocsz);
359
360         error = nvmm_machine_get(owner, args->machid, &mach, true);
361         if (error) {
362                 os_mem_free(data, allocsz);
363                 return error;
364         }
365
366         error = copyin(args->conf, data, allocsz);
367         if (error) {
368                 goto out;
369         }
370
371         error = (*nvmm_impl->machine_configure)(mach, op, data);
372
373 out:
374         nvmm_machine_put(mach);
375         os_mem_free(data, allocsz);
376         return error;
377 }
378
379 static int
380 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args)
381 {
382         struct nvmm_machine *mach;
383         struct nvmm_cpu *vcpu;
384         int error;
385
386         error = nvmm_machine_get(owner, args->machid, &mach, false);
387         if (error)
388                 return error;
389
390         error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu);
391         if (error)
392                 goto out;
393
394         /* Map the comm page on the kernel side, as wired. */
395         error = os_vmobj_map(os_kernel_map, (vaddr_t *)&vcpu->comm,
396             NVMM_COMM_PAGE_SIZE, mach->commvmobj,
397             args->cpuid * NVMM_COMM_PAGE_SIZE, true /* wired */,
398             false /* !fixed */, true /* shared */, PROT_READ | PROT_WRITE,
399             PROT_READ | PROT_WRITE);
400         if (error) {
401                 nvmm_vcpu_free(mach, vcpu);
402                 nvmm_vcpu_put(vcpu);
403                 goto out;
404         }
405
406         memset(vcpu->comm, 0, NVMM_COMM_PAGE_SIZE);
407
408         /* Map the comm page on the user side, as pageable. */
409         error = os_vmobj_map(os_curproc_map, (vaddr_t *)&args->comm,
410             NVMM_COMM_PAGE_SIZE, mach->commvmobj,
411             args->cpuid * NVMM_COMM_PAGE_SIZE, false /* !wired */,
412             false /* !fixed */, true /* shared */, PROT_READ | PROT_WRITE,
413             PROT_READ | PROT_WRITE);
414         if (error) {
415                 nvmm_vcpu_free(mach, vcpu);
416                 nvmm_vcpu_put(vcpu);
417                 goto out;
418         }
419
420         error = (*nvmm_impl->vcpu_create)(mach, vcpu);
421         if (error) {
422                 nvmm_vcpu_free(mach, vcpu);
423                 nvmm_vcpu_put(vcpu);
424                 goto out;
425         }
426
427         nvmm_vcpu_put(vcpu);
428         os_atomic_inc_uint(&mach->ncpus);
429
430 out:
431         nvmm_machine_put(mach);
432         return error;
433 }
434
435 static int
436 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args)
437 {
438         struct nvmm_machine *mach;
439         struct nvmm_cpu *vcpu;
440         int error;
441
442         error = nvmm_machine_get(owner, args->machid, &mach, false);
443         if (error)
444                 return error;
445
446         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
447         if (error)
448                 goto out;
449
450         (*nvmm_impl->vcpu_destroy)(mach, vcpu);
451         nvmm_vcpu_free(mach, vcpu);
452         nvmm_vcpu_put(vcpu);
453         os_atomic_dec_uint(&mach->ncpus);
454
455 out:
456         nvmm_machine_put(mach);
457         return error;
458 }
459
460 static int
461 nvmm_vcpu_configure(struct nvmm_owner *owner,
462     struct nvmm_ioc_vcpu_configure *args)
463 {
464         struct nvmm_machine *mach;
465         struct nvmm_cpu *vcpu;
466         size_t allocsz;
467         uint64_t op;
468         void *data;
469         int error;
470
471         op = NVMM_VCPU_CONF_MD(args->op);
472         if (__predict_false(op >= nvmm_impl->vcpu_conf_max))
473                 return EINVAL;
474
475         allocsz = nvmm_impl->vcpu_conf_sizes[op];
476         data = os_mem_alloc(allocsz);
477
478         error = nvmm_machine_get(owner, args->machid, &mach, false);
479         if (error) {
480                 os_mem_free(data, allocsz);
481                 return error;
482         }
483
484         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
485         if (error) {
486                 nvmm_machine_put(mach);
487                 os_mem_free(data, allocsz);
488                 return error;
489         }
490
491         error = copyin(args->conf, data, allocsz);
492         if (error) {
493                 goto out;
494         }
495
496         error = (*nvmm_impl->vcpu_configure)(vcpu, op, data);
497
498 out:
499         nvmm_vcpu_put(vcpu);
500         nvmm_machine_put(mach);
501         os_mem_free(data, allocsz);
502         return error;
503 }
504
505 static int
506 nvmm_vcpu_setstate(struct nvmm_owner *owner,
507     struct nvmm_ioc_vcpu_setstate *args)
508 {
509         struct nvmm_machine *mach;
510         struct nvmm_cpu *vcpu;
511         int error;
512
513         error = nvmm_machine_get(owner, args->machid, &mach, false);
514         if (error)
515                 return error;
516
517         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
518         if (error)
519                 goto out;
520
521         (*nvmm_impl->vcpu_setstate)(vcpu);
522         nvmm_vcpu_put(vcpu);
523
524 out:
525         nvmm_machine_put(mach);
526         return error;
527 }
528
529 static int
530 nvmm_vcpu_getstate(struct nvmm_owner *owner,
531     struct nvmm_ioc_vcpu_getstate *args)
532 {
533         struct nvmm_machine *mach;
534         struct nvmm_cpu *vcpu;
535         int error;
536
537         error = nvmm_machine_get(owner, args->machid, &mach, false);
538         if (error)
539                 return error;
540
541         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
542         if (error)
543                 goto out;
544
545         (*nvmm_impl->vcpu_getstate)(vcpu);
546         nvmm_vcpu_put(vcpu);
547
548 out:
549         nvmm_machine_put(mach);
550         return error;
551 }
552
553 static int
554 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args)
555 {
556         struct nvmm_machine *mach;
557         struct nvmm_cpu *vcpu;
558         int error;
559
560         error = nvmm_machine_get(owner, args->machid, &mach, false);
561         if (error)
562                 return error;
563
564         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
565         if (error)
566                 goto out;
567
568         error = (*nvmm_impl->vcpu_inject)(vcpu);
569         nvmm_vcpu_put(vcpu);
570
571 out:
572         nvmm_machine_put(mach);
573         return error;
574 }
575
576 static int
577 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
578     struct nvmm_vcpu_exit *exit)
579 {
580         struct vmspace *vm = mach->vm;
581         int ret;
582
583         while (1) {
584                 /* Got a signal? Or pending resched? Leave. */
585                 if (__predict_false(os_return_needed())) {
586                         exit->reason = NVMM_VCPU_EXIT_NONE;
587                         return 0;
588                 }
589
590                 /* Run the VCPU. */
591                 ret = (*nvmm_impl->vcpu_run)(mach, vcpu, exit);
592                 if (__predict_false(ret != 0)) {
593                         return ret;
594                 }
595
596                 /* Process nested page faults. */
597                 if (__predict_true(exit->reason != NVMM_VCPU_EXIT_MEMORY)) {
598                         break;
599                 }
600                 if (exit->u.mem.gpa >= mach->gpa_end) {
601                         break;
602                 }
603                 if (os_vmspace_fault(vm, exit->u.mem.gpa, exit->u.mem.prot)) {
604                         break;
605                 }
606         }
607
608         return 0;
609 }
610
611 static int
612 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args)
613 {
614         struct nvmm_machine *mach;
615         struct nvmm_cpu *vcpu;
616         int error;
617
618         error = nvmm_machine_get(owner, args->machid, &mach, false);
619         if (error)
620                 return error;
621
622         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
623         if (error)
624                 goto out;
625
626         error = nvmm_do_vcpu_run(mach, vcpu, &args->exit);
627         nvmm_vcpu_put(vcpu);
628
629 out:
630         nvmm_machine_put(mach);
631         return error;
632 }
633
634 /* -------------------------------------------------------------------------- */
635
636 static os_vmobj_t *
637 nvmm_hmapping_getvmobj(struct nvmm_machine *mach, uintptr_t hva, size_t size,
638    size_t *off)
639 {
640         struct nvmm_hmapping *hmapping;
641         size_t i;
642
643         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
644                 hmapping = &mach->hmap[i];
645                 if (!hmapping->present) {
646                         continue;
647                 }
648                 if (hva >= hmapping->hva &&
649                     hva + size <= hmapping->hva + hmapping->size) {
650                         *off = hva - hmapping->hva;
651                         return hmapping->vmobj;
652                 }
653         }
654
655         return NULL;
656 }
657
658 static int
659 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size)
660 {
661         struct nvmm_hmapping *hmapping;
662         size_t i;
663         uintptr_t hva_end;
664         uintptr_t hmap_end;
665
666         if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) {
667                 return EINVAL;
668         }
669         if (hva == 0) {
670                 return EINVAL;
671         }
672
673         /*
674          * Overflow tests MUST be done very carefully to avoid compiler
675          * optimizations from effectively deleting the test.
676          */
677         hva_end = hva + size;
678         if (hva_end <= hva)
679                 return EINVAL;
680
681         /*
682          * Overlap tests
683          */
684         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
685                 hmapping = &mach->hmap[i];
686
687                 if (!hmapping->present) {
688                         continue;
689                 }
690                 hmap_end = hmapping->hva + hmapping->size;
691
692                 if (hva >= hmapping->hva && hva_end <= hmap_end)
693                         break;
694                 if (hva >= hmapping->hva && hva < hmap_end)
695                         return EEXIST;
696                 if (hva_end > hmapping->hva && hva_end <= hmap_end)
697                         return EEXIST;
698                 if (hva <= hmapping->hva && hva_end >= hmap_end)
699                         return EEXIST;
700         }
701
702         return 0;
703 }
704
705 static struct nvmm_hmapping *
706 nvmm_hmapping_alloc(struct nvmm_machine *mach)
707 {
708         struct nvmm_hmapping *hmapping;
709         size_t i;
710
711         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
712                 hmapping = &mach->hmap[i];
713                 if (!hmapping->present) {
714                         hmapping->present = true;
715                         return hmapping;
716                 }
717         }
718
719         return NULL;
720 }
721
722 static int
723 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size)
724 {
725         struct nvmm_hmapping *hmapping;
726         size_t i;
727
728         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
729                 hmapping = &mach->hmap[i];
730                 if (!hmapping->present || hmapping->hva != hva ||
731                     hmapping->size != size) {
732                         continue;
733                 }
734
735                 os_vmobj_unmap(os_curproc_map, hmapping->hva,
736                     hmapping->hva + hmapping->size, false);
737                 os_vmobj_rel(hmapping->vmobj);
738
739                 hmapping->vmobj = NULL;
740                 hmapping->present = false;
741
742                 return 0;
743         }
744
745         return ENOENT;
746 }
747
748 static int
749 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args)
750 {
751         struct nvmm_machine *mach;
752         struct nvmm_hmapping *hmapping;
753         vaddr_t uva;
754         int error;
755
756         error = nvmm_machine_get(owner, args->machid, &mach, true);
757         if (error)
758                 return error;
759
760         error = nvmm_hmapping_validate(mach, args->hva, args->size);
761         if (error)
762                 goto out;
763
764         hmapping = nvmm_hmapping_alloc(mach);
765         if (hmapping == NULL) {
766                 error = ENOBUFS;
767                 goto out;
768         }
769
770         hmapping->hva = args->hva;
771         hmapping->size = args->size;
772         hmapping->vmobj = os_vmobj_create(hmapping->size);
773         uva = hmapping->hva;
774
775         /* Map the vmobj into the user address space, as pageable. */
776         error = os_vmobj_map(os_curproc_map, &uva, hmapping->size,
777             hmapping->vmobj, 0, false /* !wired */, true /* fixed */,
778             true /* shared */, PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE);
779
780 out:
781         nvmm_machine_put(mach);
782         return error;
783 }
784
785 static int
786 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args)
787 {
788         struct nvmm_machine *mach;
789         int error;
790
791         error = nvmm_machine_get(owner, args->machid, &mach, true);
792         if (error)
793                 return error;
794
795         error = nvmm_hmapping_free(mach, args->hva, args->size);
796
797         nvmm_machine_put(mach);
798         return error;
799 }
800
801 /* -------------------------------------------------------------------------- */
802
803 static int
804 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args)
805 {
806         struct nvmm_machine *mach;
807         os_vmobj_t *vmobj;
808         gpaddr_t gpa;
809         gpaddr_t gpa_end;
810         size_t off;
811         int error;
812
813         error = nvmm_machine_get(owner, args->machid, &mach, false);
814         if (error)
815                 return error;
816
817         if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) {
818                 error = EINVAL;
819                 goto out;
820         }
821
822         /*
823          * Overflow tests MUST be done very carefully to avoid compiler
824          * optimizations from effectively deleting the test.
825          */
826         gpa = args->gpa;
827         gpa_end = gpa + args->size;
828         if (gpa_end <= gpa) {
829                 error = EINVAL;
830                 goto out;
831         }
832
833         if ((gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
834             (args->hva % PAGE_SIZE) != 0) {
835                 error = EINVAL;
836                 goto out;
837         }
838         if (args->hva == 0) {
839                 error = EINVAL;
840                 goto out;
841         }
842
843         if (gpa < mach->gpa_begin || gpa >= mach->gpa_end) {
844                 error = EINVAL;
845                 goto out;
846         }
847         if (gpa_end  > mach->gpa_end) {
848                 error = EINVAL;
849                 goto out;
850         }
851
852         vmobj = nvmm_hmapping_getvmobj(mach, args->hva, args->size, &off);
853         if (vmobj == NULL) {
854                 error = EINVAL;
855                 goto out;
856         }
857
858         /* Map the vmobj into the machine address space, as pageable. */
859         error = os_vmobj_map(&mach->vm->vm_map, &gpa, args->size, vmobj, off,
860             false /* !wired */, true /* fixed */, false /* !shared */,
861             args->prot, PROT_READ | PROT_WRITE | PROT_EXEC);
862
863 out:
864         nvmm_machine_put(mach);
865         return error;
866 }
867
868 static int
869 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args)
870 {
871         struct nvmm_machine *mach;
872         gpaddr_t gpa;
873         gpaddr_t gpa_end;
874         int error;
875
876         error = nvmm_machine_get(owner, args->machid, &mach, false);
877         if (error)
878                 return error;
879
880         /*
881          * Overflow tests MUST be done very carefully to avoid compiler
882          * optimizations from effectively deleting the test.
883          */
884         gpa = args->gpa;
885         gpa_end = gpa + args->size;
886         if (gpa_end <= gpa) {
887                 error = EINVAL;
888                 goto out;
889         }
890
891         if ((gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
892                 error = EINVAL;
893                 goto out;
894         }
895         if (gpa < mach->gpa_begin || gpa >= mach->gpa_end) {
896                 error = EINVAL;
897                 goto out;
898         }
899         if (gpa_end >= mach->gpa_end) {
900                 error = EINVAL;
901                 goto out;
902         }
903
904         /* Unmap the memory from the machine. */
905         os_vmobj_unmap(&mach->vm->vm_map, gpa, gpa + args->size, false);
906
907 out:
908         nvmm_machine_put(mach);
909         return error;
910 }
911
912 /* -------------------------------------------------------------------------- */
913
914 static int
915 nvmm_ctl_mach_info(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
916 {
917         struct nvmm_ctl_mach_info ctl;
918         struct nvmm_machine *mach;
919         int error;
920         size_t i;
921
922         if (args->size != sizeof(ctl))
923                 return EINVAL;
924         error = copyin(args->data, &ctl, sizeof(ctl));
925         if (error)
926                 return error;
927
928         error = nvmm_machine_get(owner, ctl.machid, &mach, true);
929         if (error)
930                 return error;
931
932         ctl.nvcpus = mach->ncpus;
933
934         ctl.nram = 0;
935         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
936                 if (!mach->hmap[i].present)
937                         continue;
938                 ctl.nram += mach->hmap[i].size;
939         }
940
941         ctl.pid = mach->owner->pid;
942         ctl.time = mach->time;
943
944         nvmm_machine_put(mach);
945
946         error = copyout(&ctl, args->data, sizeof(ctl));
947         if (error)
948                 return error;
949
950         return 0;
951 }
952
953 static int
954 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
955 {
956         switch (args->op) {
957         case NVMM_CTL_MACH_INFO:
958                 return nvmm_ctl_mach_info(owner, args);
959         default:
960                 return EINVAL;
961         }
962 }
963
964 /* -------------------------------------------------------------------------- */
965
966 const struct nvmm_impl *
967 nvmm_ident(void)
968 {
969         size_t i;
970
971         for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
972                 if ((*nvmm_impl_list[i]->ident)())
973                         return nvmm_impl_list[i];
974         }
975
976         return NULL;
977 }
978
979 int
980 nvmm_init(void)
981 {
982         size_t i, n;
983
984         nvmm_impl = nvmm_ident();
985         if (nvmm_impl == NULL)
986                 return ENOTSUP;
987
988         for (i = 0; i < NVMM_MAX_MACHINES; i++) {
989                 machines[i].machid = i;
990                 os_rwl_init(&machines[i].lock);
991                 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
992                         machines[i].cpus[n].present = false;
993                         machines[i].cpus[n].cpuid = n;
994                         os_mtx_init(&machines[i].cpus[n].lock);
995                 }
996         }
997
998         (*nvmm_impl->init)();
999
1000         return 0;
1001 }
1002
1003 void
1004 nvmm_fini(void)
1005 {
1006         size_t i, n;
1007
1008         for (i = 0; i < NVMM_MAX_MACHINES; i++) {
1009                 os_rwl_destroy(&machines[i].lock);
1010                 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
1011                         os_mtx_destroy(&machines[i].cpus[n].lock);
1012                 }
1013         }
1014
1015         (*nvmm_impl->fini)();
1016         nvmm_impl = NULL;
1017 }
1018
1019 /* -------------------------------------------------------------------------- */
1020
1021 int
1022 nvmm_ioctl(struct nvmm_owner *owner, unsigned long cmd, void *data)
1023 {
1024         switch (cmd) {
1025         case NVMM_IOC_CAPABILITY:
1026                 return nvmm_capability(owner, data);
1027         case NVMM_IOC_MACHINE_CREATE:
1028                 return nvmm_machine_create(owner, data);
1029         case NVMM_IOC_MACHINE_DESTROY:
1030                 return nvmm_machine_destroy(owner, data);
1031         case NVMM_IOC_MACHINE_CONFIGURE:
1032                 return nvmm_machine_configure(owner, data);
1033         case NVMM_IOC_VCPU_CREATE:
1034                 return nvmm_vcpu_create(owner, data);
1035         case NVMM_IOC_VCPU_DESTROY:
1036                 return nvmm_vcpu_destroy(owner, data);
1037         case NVMM_IOC_VCPU_CONFIGURE:
1038                 return nvmm_vcpu_configure(owner, data);
1039         case NVMM_IOC_VCPU_SETSTATE:
1040                 return nvmm_vcpu_setstate(owner, data);
1041         case NVMM_IOC_VCPU_GETSTATE:
1042                 return nvmm_vcpu_getstate(owner, data);
1043         case NVMM_IOC_VCPU_INJECT:
1044                 return nvmm_vcpu_inject(owner, data);
1045         case NVMM_IOC_VCPU_RUN:
1046                 return nvmm_vcpu_run(owner, data);
1047         case NVMM_IOC_GPA_MAP:
1048                 return nvmm_gpa_map(owner, data);
1049         case NVMM_IOC_GPA_UNMAP:
1050                 return nvmm_gpa_unmap(owner, data);
1051         case NVMM_IOC_HVA_MAP:
1052                 return nvmm_hva_map(owner, data);
1053         case NVMM_IOC_HVA_UNMAP:
1054                 return nvmm_hva_unmap(owner, data);
1055         case NVMM_IOC_CTL:
1056                 return nvmm_ctl(owner, data);
1057         default:
1058                 return EINVAL;
1059         }
1060 }