kernel - Do not block indefinitely in exit1() when draining controlling tty
[dragonfly.git] / sys / dev / virtual / nvmm / nvmm.c
1 /*
2  * Copyright (c) 2018-2021 Maxime Villard, m00nbsd.net
3  * All rights reserved.
4  *
5  * This code is part of the NVMM hypervisor.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31
32 #include <sys/kernel.h>
33 #include <sys/mman.h>
34
35 #include "nvmm.h"
36 #include "nvmm_internal.h"
37 #include "nvmm_ioctl.h"
38
39 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
40 volatile unsigned int nmachines __cacheline_aligned;
41
42 static const struct nvmm_impl *nvmm_impl_list[] = {
43 #if defined(__x86_64__)
44         &nvmm_x86_svm,  /* x86 AMD SVM */
45         &nvmm_x86_vmx   /* x86 Intel VMX */
46 #endif
47 };
48
49 const struct nvmm_impl *nvmm_impl __read_mostly = NULL;
50
51 struct nvmm_owner nvmm_root_owner;
52
53 /* -------------------------------------------------------------------------- */
54
55 static int
56 nvmm_machine_alloc(struct nvmm_machine **ret)
57 {
58         struct nvmm_machine *mach;
59         size_t i;
60
61         for (i = 0; i < NVMM_MAX_MACHINES; i++) {
62                 mach = &machines[i];
63
64                 os_rwl_wlock(&mach->lock);
65                 if (mach->present) {
66                         os_rwl_unlock(&mach->lock);
67                         continue;
68                 }
69
70                 mach->present = true;
71                 mach->time = time_second;
72                 *ret = mach;
73                 os_atomic_inc_uint(&nmachines);
74                 return 0;
75         }
76
77         return ENOBUFS;
78 }
79
80 static void
81 nvmm_machine_free(struct nvmm_machine *mach)
82 {
83         OS_ASSERT(os_rwl_wheld(&mach->lock));
84         OS_ASSERT(mach->present);
85         mach->present = false;
86         os_atomic_dec_uint(&nmachines);
87 }
88
89 static int
90 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid,
91     struct nvmm_machine **ret, bool writer)
92 {
93         struct nvmm_machine *mach;
94
95         if (__predict_false(machid >= NVMM_MAX_MACHINES)) {
96                 return EINVAL;
97         }
98         mach = &machines[machid];
99
100         if (__predict_false(writer)) {
101                 os_rwl_wlock(&mach->lock);
102         } else {
103                 os_rwl_rlock(&mach->lock);
104         }
105         if (__predict_false(!mach->present)) {
106                 os_rwl_unlock(&mach->lock);
107                 return ENOENT;
108         }
109         if (__predict_false(mach->owner != owner &&
110                             owner != &nvmm_root_owner)) {
111                 os_rwl_unlock(&mach->lock);
112                 return EPERM;
113         }
114         *ret = mach;
115
116         return 0;
117 }
118
119 static void
120 nvmm_machine_put(struct nvmm_machine *mach)
121 {
122         os_rwl_unlock(&mach->lock);
123 }
124
125 /* -------------------------------------------------------------------------- */
126
127 static int
128 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
129     struct nvmm_cpu **ret)
130 {
131         struct nvmm_cpu *vcpu;
132
133         if (cpuid >= NVMM_MAX_VCPUS) {
134                 return EINVAL;
135         }
136         vcpu = &mach->cpus[cpuid];
137
138         os_mtx_lock(&vcpu->lock);
139         if (vcpu->present) {
140                 os_mtx_unlock(&vcpu->lock);
141                 return EBUSY;
142         }
143
144         vcpu->present = true;
145         vcpu->comm = NULL;
146         vcpu->hcpu_last = -1;
147         *ret = vcpu;
148         return 0;
149 }
150
151 static void
152 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
153 {
154         OS_ASSERT(os_mtx_owned(&vcpu->lock));
155         vcpu->present = false;
156         if (vcpu->comm != NULL) {
157                 os_vmobj_unmap(os_kernel_map, (vaddr_t)vcpu->comm,
158                     (vaddr_t)vcpu->comm + NVMM_COMM_PAGE_SIZE, true);
159                 /*
160                  * Require userland to unmap the comm page from its address
161                  * space, because os_curproc_map at this point (fd close)
162                  * is not guaranteed to be the correct address space.
163                  */
164         }
165 }
166
167 static int
168 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
169     struct nvmm_cpu **ret)
170 {
171         struct nvmm_cpu *vcpu;
172
173         if (__predict_false(cpuid >= NVMM_MAX_VCPUS)) {
174                 return EINVAL;
175         }
176         vcpu = &mach->cpus[cpuid];
177
178         os_mtx_lock(&vcpu->lock);
179         if (__predict_false(!vcpu->present)) {
180                 os_mtx_unlock(&vcpu->lock);
181                 return ENOENT;
182         }
183         *ret = vcpu;
184
185         return 0;
186 }
187
188 static void
189 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
190 {
191         os_mtx_unlock(&vcpu->lock);
192 }
193
194 /* -------------------------------------------------------------------------- */
195
196 void
197 nvmm_kill_machines(struct nvmm_owner *owner)
198 {
199         struct nvmm_machine *mach;
200         struct nvmm_cpu *vcpu;
201         size_t i, j;
202         int error;
203
204         for (i = 0; i < NVMM_MAX_MACHINES; i++) {
205                 mach = &machines[i];
206
207                 os_rwl_wlock(&mach->lock);
208                 if (!mach->present || mach->owner != owner) {
209                         os_rwl_unlock(&mach->lock);
210                         continue;
211                 }
212
213                 /* Kill it. */
214                 for (j = 0; j < NVMM_MAX_VCPUS; j++) {
215                         error = nvmm_vcpu_get(mach, j, &vcpu);
216                         if (error)
217                                 continue;
218                         (*nvmm_impl->vcpu_destroy)(mach, vcpu);
219                         nvmm_vcpu_free(mach, vcpu);
220                         nvmm_vcpu_put(vcpu);
221                         os_atomic_dec_uint(&mach->ncpus);
222                 }
223                 (*nvmm_impl->machine_destroy)(mach);
224                 os_vmspace_destroy(mach->vm);
225
226                 /* Drop the kernel vmobj refs. */
227                 for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) {
228                         if (!mach->hmap[j].present)
229                                 continue;
230                         os_vmobj_rel(mach->hmap[j].vmobj);
231                 }
232
233                 nvmm_machine_free(mach);
234
235                 os_rwl_unlock(&mach->lock);
236         }
237 }
238
239 /* -------------------------------------------------------------------------- */
240
241 static int
242 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args)
243 {
244         args->cap.version = NVMM_KERN_VERSION;
245         args->cap.state_size = nvmm_impl->state_size;
246         args->cap.comm_size = NVMM_COMM_PAGE_SIZE;
247         args->cap.max_machines = NVMM_MAX_MACHINES;
248         args->cap.max_vcpus = NVMM_MAX_VCPUS;
249         args->cap.max_ram = NVMM_MAX_RAM;
250
251         (*nvmm_impl->capability)(&args->cap);
252
253         return 0;
254 }
255
256 static int
257 nvmm_machine_create(struct nvmm_owner *owner,
258     struct nvmm_ioc_machine_create *args)
259 {
260         struct nvmm_machine *mach;
261         int error;
262
263         error = nvmm_machine_alloc(&mach);
264         if (error)
265                 return error;
266
267         /* Curproc owns the machine. */
268         mach->owner = owner;
269
270         /* Zero out the host mappings. */
271         memset(&mach->hmap, 0, sizeof(mach->hmap));
272
273         /* Create the machine vmspace. */
274         mach->gpa_begin = 0;
275         mach->gpa_end = NVMM_MAX_RAM;
276         mach->vm = os_vmspace_create(mach->gpa_begin, mach->gpa_end);
277
278         /* Create the comm vmobj. */
279         mach->commvmobj = os_vmobj_create(
280             NVMM_MAX_VCPUS * NVMM_COMM_PAGE_SIZE);
281
282         (*nvmm_impl->machine_create)(mach);
283
284         args->machid = mach->machid;
285         nvmm_machine_put(mach);
286
287         return 0;
288 }
289
290 static int
291 nvmm_machine_destroy(struct nvmm_owner *owner,
292     struct nvmm_ioc_machine_destroy *args)
293 {
294         struct nvmm_machine *mach;
295         struct nvmm_cpu *vcpu;
296         int error;
297         size_t i;
298
299         error = nvmm_machine_get(owner, args->machid, &mach, true);
300         if (error)
301                 return error;
302
303         for (i = 0; i < NVMM_MAX_VCPUS; i++) {
304                 error = nvmm_vcpu_get(mach, i, &vcpu);
305                 if (error)
306                         continue;
307
308                 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
309                 nvmm_vcpu_free(mach, vcpu);
310                 nvmm_vcpu_put(vcpu);
311                 os_atomic_dec_uint(&mach->ncpus);
312         }
313
314         (*nvmm_impl->machine_destroy)(mach);
315
316         /* Free the machine vmspace. */
317         os_vmspace_destroy(mach->vm);
318
319         /* Drop the kernel vmobj refs. */
320         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
321                 if (!mach->hmap[i].present)
322                         continue;
323                 os_vmobj_rel(mach->hmap[i].vmobj);
324         }
325
326         nvmm_machine_free(mach);
327         nvmm_machine_put(mach);
328
329         return 0;
330 }
331
332 static int
333 nvmm_machine_configure(struct nvmm_owner *owner,
334     struct nvmm_ioc_machine_configure *args)
335 {
336         struct nvmm_machine *mach;
337         size_t allocsz;
338         uint64_t op;
339         void *data;
340         int error;
341
342         op = NVMM_MACH_CONF_MD(args->op);
343         if (__predict_false(op >= nvmm_impl->mach_conf_max)) {
344                 return EINVAL;
345         }
346
347         allocsz = nvmm_impl->mach_conf_sizes[op];
348         data = os_mem_alloc(allocsz);
349
350         error = nvmm_machine_get(owner, args->machid, &mach, true);
351         if (error) {
352                 os_mem_free(data, allocsz);
353                 return error;
354         }
355
356         error = copyin(args->conf, data, allocsz);
357         if (error) {
358                 goto out;
359         }
360
361         error = (*nvmm_impl->machine_configure)(mach, op, data);
362
363 out:
364         nvmm_machine_put(mach);
365         os_mem_free(data, allocsz);
366         return error;
367 }
368
369 static int
370 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args)
371 {
372         struct nvmm_machine *mach;
373         struct nvmm_cpu *vcpu;
374         int error;
375
376         error = nvmm_machine_get(owner, args->machid, &mach, false);
377         if (error)
378                 return error;
379
380         error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu);
381         if (error)
382                 goto out;
383
384         /* Map the comm page on the kernel side, as wired. */
385         error = os_vmobj_map(os_kernel_map, (vaddr_t *)&vcpu->comm,
386             NVMM_COMM_PAGE_SIZE, mach->commvmobj,
387             args->cpuid * NVMM_COMM_PAGE_SIZE, true /* wired */,
388             false /* !fixed */, true /* shared */, PROT_READ | PROT_WRITE,
389             PROT_READ | PROT_WRITE);
390         if (error) {
391                 nvmm_vcpu_free(mach, vcpu);
392                 nvmm_vcpu_put(vcpu);
393                 goto out;
394         }
395
396         memset(vcpu->comm, 0, NVMM_COMM_PAGE_SIZE);
397
398         /* Map the comm page on the user side, as pageable. */
399         error = os_vmobj_map(os_curproc_map, (vaddr_t *)&args->comm,
400             NVMM_COMM_PAGE_SIZE, mach->commvmobj,
401             args->cpuid * NVMM_COMM_PAGE_SIZE, false /* !wired */,
402             false /* !fixed */, true /* shared */, PROT_READ | PROT_WRITE,
403             PROT_READ | PROT_WRITE);
404         if (error) {
405                 nvmm_vcpu_free(mach, vcpu);
406                 nvmm_vcpu_put(vcpu);
407                 goto out;
408         }
409
410         error = (*nvmm_impl->vcpu_create)(mach, vcpu);
411         if (error) {
412                 nvmm_vcpu_free(mach, vcpu);
413                 nvmm_vcpu_put(vcpu);
414                 goto out;
415         }
416
417         nvmm_vcpu_put(vcpu);
418         os_atomic_inc_uint(&mach->ncpus);
419
420 out:
421         nvmm_machine_put(mach);
422         return error;
423 }
424
425 static int
426 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args)
427 {
428         struct nvmm_machine *mach;
429         struct nvmm_cpu *vcpu;
430         int error;
431
432         error = nvmm_machine_get(owner, args->machid, &mach, false);
433         if (error)
434                 return error;
435
436         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
437         if (error)
438                 goto out;
439
440         (*nvmm_impl->vcpu_destroy)(mach, vcpu);
441         nvmm_vcpu_free(mach, vcpu);
442         nvmm_vcpu_put(vcpu);
443         os_atomic_dec_uint(&mach->ncpus);
444
445 out:
446         nvmm_machine_put(mach);
447         return error;
448 }
449
450 static int
451 nvmm_vcpu_configure(struct nvmm_owner *owner,
452     struct nvmm_ioc_vcpu_configure *args)
453 {
454         struct nvmm_machine *mach;
455         struct nvmm_cpu *vcpu;
456         size_t allocsz;
457         uint64_t op;
458         void *data;
459         int error;
460
461         op = NVMM_VCPU_CONF_MD(args->op);
462         if (__predict_false(op >= nvmm_impl->vcpu_conf_max))
463                 return EINVAL;
464
465         allocsz = nvmm_impl->vcpu_conf_sizes[op];
466         data = os_mem_alloc(allocsz);
467
468         error = nvmm_machine_get(owner, args->machid, &mach, false);
469         if (error) {
470                 os_mem_free(data, allocsz);
471                 return error;
472         }
473
474         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
475         if (error) {
476                 nvmm_machine_put(mach);
477                 os_mem_free(data, allocsz);
478                 return error;
479         }
480
481         error = copyin(args->conf, data, allocsz);
482         if (error) {
483                 goto out;
484         }
485
486         error = (*nvmm_impl->vcpu_configure)(vcpu, op, data);
487
488 out:
489         nvmm_vcpu_put(vcpu);
490         nvmm_machine_put(mach);
491         os_mem_free(data, allocsz);
492         return error;
493 }
494
495 static int
496 nvmm_vcpu_setstate(struct nvmm_owner *owner,
497     struct nvmm_ioc_vcpu_setstate *args)
498 {
499         struct nvmm_machine *mach;
500         struct nvmm_cpu *vcpu;
501         int error;
502
503         error = nvmm_machine_get(owner, args->machid, &mach, false);
504         if (error)
505                 return error;
506
507         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
508         if (error)
509                 goto out;
510
511         (*nvmm_impl->vcpu_setstate)(vcpu);
512         nvmm_vcpu_put(vcpu);
513
514 out:
515         nvmm_machine_put(mach);
516         return error;
517 }
518
519 static int
520 nvmm_vcpu_getstate(struct nvmm_owner *owner,
521     struct nvmm_ioc_vcpu_getstate *args)
522 {
523         struct nvmm_machine *mach;
524         struct nvmm_cpu *vcpu;
525         int error;
526
527         error = nvmm_machine_get(owner, args->machid, &mach, false);
528         if (error)
529                 return error;
530
531         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
532         if (error)
533                 goto out;
534
535         (*nvmm_impl->vcpu_getstate)(vcpu);
536         nvmm_vcpu_put(vcpu);
537
538 out:
539         nvmm_machine_put(mach);
540         return error;
541 }
542
543 static int
544 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args)
545 {
546         struct nvmm_machine *mach;
547         struct nvmm_cpu *vcpu;
548         int error;
549
550         error = nvmm_machine_get(owner, args->machid, &mach, false);
551         if (error)
552                 return error;
553
554         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
555         if (error)
556                 goto out;
557
558         error = (*nvmm_impl->vcpu_inject)(vcpu);
559         nvmm_vcpu_put(vcpu);
560
561 out:
562         nvmm_machine_put(mach);
563         return error;
564 }
565
566 static int
567 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
568     struct nvmm_vcpu_exit *exit)
569 {
570         struct vmspace *vm = mach->vm;
571         int ret;
572
573         while (1) {
574                 /* Got a signal? Or pending resched? Leave. */
575                 if (__predict_false(os_return_needed())) {
576                         exit->reason = NVMM_VCPU_EXIT_NONE;
577                         return 0;
578                 }
579
580                 /* Run the VCPU. */
581                 ret = (*nvmm_impl->vcpu_run)(mach, vcpu, exit);
582                 if (__predict_false(ret != 0)) {
583                         return ret;
584                 }
585
586                 /* Process nested page faults. */
587                 if (__predict_true(exit->reason != NVMM_VCPU_EXIT_MEMORY)) {
588                         break;
589                 }
590                 if (exit->u.mem.gpa >= mach->gpa_end) {
591                         break;
592                 }
593                 if (os_vmspace_fault(vm, exit->u.mem.gpa, exit->u.mem.prot)) {
594                         break;
595                 }
596         }
597
598         return 0;
599 }
600
601 static int
602 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args)
603 {
604         struct nvmm_machine *mach;
605         struct nvmm_cpu *vcpu;
606         int error;
607
608         error = nvmm_machine_get(owner, args->machid, &mach, false);
609         if (error)
610                 return error;
611
612         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
613         if (error)
614                 goto out;
615
616         error = nvmm_do_vcpu_run(mach, vcpu, &args->exit);
617         nvmm_vcpu_put(vcpu);
618
619 out:
620         nvmm_machine_put(mach);
621         return error;
622 }
623
624 /* -------------------------------------------------------------------------- */
625
626 static os_vmobj_t *
627 nvmm_hmapping_getvmobj(struct nvmm_machine *mach, uintptr_t hva, size_t size,
628    size_t *off)
629 {
630         struct nvmm_hmapping *hmapping;
631         size_t i;
632
633         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
634                 hmapping = &mach->hmap[i];
635                 if (!hmapping->present) {
636                         continue;
637                 }
638                 if (hva >= hmapping->hva &&
639                     hva + size <= hmapping->hva + hmapping->size) {
640                         *off = hva - hmapping->hva;
641                         return hmapping->vmobj;
642                 }
643         }
644
645         return NULL;
646 }
647
648 static int
649 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size)
650 {
651         struct nvmm_hmapping *hmapping;
652         size_t i;
653         uintptr_t hva_end;
654         uintptr_t hmap_end;
655
656         if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) {
657                 return EINVAL;
658         }
659         if (hva == 0) {
660                 return EINVAL;
661         }
662
663         /*
664          * Overflow tests MUST be done very carefully to avoid compiler
665          * optimizations from effectively deleting the test.
666          */
667         hva_end = hva + size;
668         if (hva_end <= hva)
669                 return EINVAL;
670
671         /*
672          * Overlap tests
673          */
674         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
675                 hmapping = &mach->hmap[i];
676
677                 if (!hmapping->present) {
678                         continue;
679                 }
680                 hmap_end = hmapping->hva + hmapping->size;
681
682                 if (hva >= hmapping->hva && hva_end <= hmap_end)
683                         break;
684                 if (hva >= hmapping->hva && hva < hmap_end)
685                         return EEXIST;
686                 if (hva_end > hmapping->hva && hva_end <= hmap_end)
687                         return EEXIST;
688                 if (hva <= hmapping->hva && hva_end >= hmap_end)
689                         return EEXIST;
690         }
691
692         return 0;
693 }
694
695 static struct nvmm_hmapping *
696 nvmm_hmapping_alloc(struct nvmm_machine *mach)
697 {
698         struct nvmm_hmapping *hmapping;
699         size_t i;
700
701         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
702                 hmapping = &mach->hmap[i];
703                 if (!hmapping->present) {
704                         hmapping->present = true;
705                         return hmapping;
706                 }
707         }
708
709         return NULL;
710 }
711
712 static int
713 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size)
714 {
715         struct nvmm_hmapping *hmapping;
716         size_t i;
717
718         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
719                 hmapping = &mach->hmap[i];
720                 if (!hmapping->present || hmapping->hva != hva ||
721                     hmapping->size != size) {
722                         continue;
723                 }
724
725                 os_vmobj_unmap(os_curproc_map, hmapping->hva,
726                     hmapping->hva + hmapping->size, false);
727                 os_vmobj_rel(hmapping->vmobj);
728
729                 hmapping->vmobj = NULL;
730                 hmapping->present = false;
731
732                 return 0;
733         }
734
735         return ENOENT;
736 }
737
738 static int
739 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args)
740 {
741         struct nvmm_machine *mach;
742         struct nvmm_hmapping *hmapping;
743         vaddr_t uva;
744         int error;
745
746         error = nvmm_machine_get(owner, args->machid, &mach, true);
747         if (error)
748                 return error;
749
750         error = nvmm_hmapping_validate(mach, args->hva, args->size);
751         if (error)
752                 goto out;
753
754         hmapping = nvmm_hmapping_alloc(mach);
755         if (hmapping == NULL) {
756                 error = ENOBUFS;
757                 goto out;
758         }
759
760         hmapping->hva = args->hva;
761         hmapping->size = args->size;
762         hmapping->vmobj = os_vmobj_create(hmapping->size);
763         uva = hmapping->hva;
764
765         /* Map the vmobj into the user address space, as pageable. */
766         error = os_vmobj_map(os_curproc_map, &uva, hmapping->size,
767             hmapping->vmobj, 0, false /* !wired */, true /* fixed */,
768             true /* shared */, PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE);
769
770 out:
771         nvmm_machine_put(mach);
772         return error;
773 }
774
775 static int
776 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args)
777 {
778         struct nvmm_machine *mach;
779         int error;
780
781         error = nvmm_machine_get(owner, args->machid, &mach, true);
782         if (error)
783                 return error;
784
785         error = nvmm_hmapping_free(mach, args->hva, args->size);
786
787         nvmm_machine_put(mach);
788         return error;
789 }
790
791 /* -------------------------------------------------------------------------- */
792
793 static int
794 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args)
795 {
796         struct nvmm_machine *mach;
797         os_vmobj_t *vmobj;
798         gpaddr_t gpa;
799         gpaddr_t gpa_end;
800         size_t off;
801         int error;
802
803         error = nvmm_machine_get(owner, args->machid, &mach, false);
804         if (error)
805                 return error;
806
807         if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) {
808                 error = EINVAL;
809                 goto out;
810         }
811
812         /*
813          * Overflow tests MUST be done very carefully to avoid compiler
814          * optimizations from effectively deleting the test.
815          */
816         gpa = args->gpa;
817         gpa_end = gpa + args->size;
818         if (gpa_end <= gpa) {
819                 error = EINVAL;
820                 goto out;
821         }
822
823         if ((gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
824             (args->hva % PAGE_SIZE) != 0) {
825                 error = EINVAL;
826                 goto out;
827         }
828         if (args->hva == 0) {
829                 error = EINVAL;
830                 goto out;
831         }
832
833         if (gpa < mach->gpa_begin || gpa >= mach->gpa_end) {
834                 error = EINVAL;
835                 goto out;
836         }
837         if (gpa_end  > mach->gpa_end) {
838                 error = EINVAL;
839                 goto out;
840         }
841
842         vmobj = nvmm_hmapping_getvmobj(mach, args->hva, args->size, &off);
843         if (vmobj == NULL) {
844                 error = EINVAL;
845                 goto out;
846         }
847
848         /* Map the vmobj into the machine address space, as pageable. */
849         error = os_vmobj_map(&mach->vm->vm_map, &gpa, args->size, vmobj, off,
850             false /* !wired */, true /* fixed */, false /* !shared */,
851             args->prot, PROT_READ | PROT_WRITE | PROT_EXEC);
852
853 out:
854         nvmm_machine_put(mach);
855         return error;
856 }
857
858 static int
859 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args)
860 {
861         struct nvmm_machine *mach;
862         gpaddr_t gpa;
863         gpaddr_t gpa_end;
864         int error;
865
866         error = nvmm_machine_get(owner, args->machid, &mach, false);
867         if (error)
868                 return error;
869
870         /*
871          * Overflow tests MUST be done very carefully to avoid compiler
872          * optimizations from effectively deleting the test.
873          */
874         gpa = args->gpa;
875         gpa_end = gpa + args->size;
876         if (gpa_end <= gpa) {
877                 error = EINVAL;
878                 goto out;
879         }
880
881         if ((gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
882                 error = EINVAL;
883                 goto out;
884         }
885         if (gpa < mach->gpa_begin || gpa >= mach->gpa_end) {
886                 error = EINVAL;
887                 goto out;
888         }
889         if (gpa_end >= mach->gpa_end) {
890                 error = EINVAL;
891                 goto out;
892         }
893
894         /* Unmap the memory from the machine. */
895         os_vmobj_unmap(&mach->vm->vm_map, gpa, gpa + args->size, false);
896
897 out:
898         nvmm_machine_put(mach);
899         return error;
900 }
901
902 /* -------------------------------------------------------------------------- */
903
904 static int
905 nvmm_ctl_mach_info(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
906 {
907         struct nvmm_ctl_mach_info ctl;
908         struct nvmm_machine *mach;
909         int error;
910         size_t i;
911
912         if (args->size != sizeof(ctl))
913                 return EINVAL;
914         error = copyin(args->data, &ctl, sizeof(ctl));
915         if (error)
916                 return error;
917
918         error = nvmm_machine_get(owner, ctl.machid, &mach, true);
919         if (error)
920                 return error;
921
922         ctl.nvcpus = mach->ncpus;
923
924         ctl.nram = 0;
925         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
926                 if (!mach->hmap[i].present)
927                         continue;
928                 ctl.nram += mach->hmap[i].size;
929         }
930
931         ctl.pid = mach->owner->pid;
932         ctl.time = mach->time;
933
934         nvmm_machine_put(mach);
935
936         error = copyout(&ctl, args->data, sizeof(ctl));
937         if (error)
938                 return error;
939
940         return 0;
941 }
942
943 static int
944 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
945 {
946         switch (args->op) {
947         case NVMM_CTL_MACH_INFO:
948                 return nvmm_ctl_mach_info(owner, args);
949         default:
950                 return EINVAL;
951         }
952 }
953
954 /* -------------------------------------------------------------------------- */
955
956 const struct nvmm_impl *
957 nvmm_ident(void)
958 {
959         size_t i;
960
961         for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
962                 if ((*nvmm_impl_list[i]->ident)())
963                         return nvmm_impl_list[i];
964         }
965
966         return NULL;
967 }
968
969 int
970 nvmm_init(void)
971 {
972         size_t i, n;
973
974         nvmm_impl = nvmm_ident();
975         if (nvmm_impl == NULL)
976                 return ENOTSUP;
977
978         for (i = 0; i < NVMM_MAX_MACHINES; i++) {
979                 machines[i].machid = i;
980                 os_rwl_init(&machines[i].lock);
981                 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
982                         machines[i].cpus[n].present = false;
983                         machines[i].cpus[n].cpuid = n;
984                         os_mtx_init(&machines[i].cpus[n].lock);
985                 }
986         }
987
988         (*nvmm_impl->init)();
989
990         return 0;
991 }
992
993 void
994 nvmm_fini(void)
995 {
996         size_t i, n;
997
998         for (i = 0; i < NVMM_MAX_MACHINES; i++) {
999                 os_rwl_destroy(&machines[i].lock);
1000                 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
1001                         os_mtx_destroy(&machines[i].cpus[n].lock);
1002                 }
1003         }
1004
1005         (*nvmm_impl->fini)();
1006         nvmm_impl = NULL;
1007 }
1008
1009 /* -------------------------------------------------------------------------- */
1010
1011 int
1012 nvmm_ioctl(struct nvmm_owner *owner, unsigned long cmd, void *data)
1013 {
1014         switch (cmd) {
1015         case NVMM_IOC_CAPABILITY:
1016                 return nvmm_capability(owner, data);
1017         case NVMM_IOC_MACHINE_CREATE:
1018                 return nvmm_machine_create(owner, data);
1019         case NVMM_IOC_MACHINE_DESTROY:
1020                 return nvmm_machine_destroy(owner, data);
1021         case NVMM_IOC_MACHINE_CONFIGURE:
1022                 return nvmm_machine_configure(owner, data);
1023         case NVMM_IOC_VCPU_CREATE:
1024                 return nvmm_vcpu_create(owner, data);
1025         case NVMM_IOC_VCPU_DESTROY:
1026                 return nvmm_vcpu_destroy(owner, data);
1027         case NVMM_IOC_VCPU_CONFIGURE:
1028                 return nvmm_vcpu_configure(owner, data);
1029         case NVMM_IOC_VCPU_SETSTATE:
1030                 return nvmm_vcpu_setstate(owner, data);
1031         case NVMM_IOC_VCPU_GETSTATE:
1032                 return nvmm_vcpu_getstate(owner, data);
1033         case NVMM_IOC_VCPU_INJECT:
1034                 return nvmm_vcpu_inject(owner, data);
1035         case NVMM_IOC_VCPU_RUN:
1036                 return nvmm_vcpu_run(owner, data);
1037         case NVMM_IOC_GPA_MAP:
1038                 return nvmm_gpa_map(owner, data);
1039         case NVMM_IOC_GPA_UNMAP:
1040                 return nvmm_gpa_unmap(owner, data);
1041         case NVMM_IOC_HVA_MAP:
1042                 return nvmm_hva_map(owner, data);
1043         case NVMM_IOC_HVA_UNMAP:
1044                 return nvmm_hva_unmap(owner, data);
1045         case NVMM_IOC_CTL:
1046                 return nvmm_ctl(owner, data);
1047         default:
1048                 return EINVAL;
1049         }
1050 }