nnvm - Move *_fpu_enter/leave inside the cli/sti
[dragonfly.git] / sys / dev / virtual / nvmm / nvmm.c
1 /*      $NetBSD: nvmm.c,v 1.43 2021/04/12 09:22:58 mrg Exp $    */
2
3 /*
4  * Copyright (c) 2018-2020 Maxime Villard, m00nbsd.net
5  * All rights reserved.
6  *
7  * This code is part of the NVMM hypervisor.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
25  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33
34 #include <sys/conf.h>
35 #include <sys/devfs.h>
36 #include <sys/device.h>
37 #include <sys/fcntl.h>
38 #include <sys/kernel.h>
39 #include <sys/module.h>
40 #include <sys/priv.h>
41 #include <sys/thread.h>
42
43 #include <dev/virtual/nvmm/nvmm_compat.h>
44 #include <dev/virtual/nvmm/nvmm.h>
45 #include <dev/virtual/nvmm/nvmm_internal.h>
46 #include <dev/virtual/nvmm/nvmm_ioctl.h>
47
48 MALLOC_DEFINE(M_NVMM, "nvmm", "NVMM data");
49
50 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
51 static volatile unsigned int nmachines __cacheline_aligned;
52
53 static const struct nvmm_impl *nvmm_impl_list[] = {
54 #if defined(__x86_64__)
55         &nvmm_x86_svm,  /* x86 AMD SVM */
56         &nvmm_x86_vmx   /* x86 Intel VMX */
57 #endif
58 };
59
60 static const struct nvmm_impl *nvmm_impl __read_mostly = NULL;
61
62 static struct nvmm_owner root_owner;
63
64 /* -------------------------------------------------------------------------- */
65
66 static int
67 nvmm_machine_alloc(struct nvmm_machine **ret)
68 {
69         struct nvmm_machine *mach;
70         size_t i;
71
72         for (i = 0; i < NVMM_MAX_MACHINES; i++) {
73                 mach = &machines[i];
74
75                 rw_enter(&mach->lock, RW_WRITER);
76                 if (mach->present) {
77                         rw_exit(&mach->lock);
78                         continue;
79                 }
80
81                 mach->present = true;
82                 mach->time = time_second;
83                 *ret = mach;
84                 atomic_inc_uint(&nmachines);
85                 return 0;
86         }
87
88         return ENOBUFS;
89 }
90
91 static void
92 nvmm_machine_free(struct nvmm_machine *mach)
93 {
94         KASSERT(rw_write_held(&mach->lock));
95         KASSERT(mach->present);
96         mach->present = false;
97         atomic_dec_uint(&nmachines);
98 }
99
100 static int
101 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid,
102     struct nvmm_machine **ret, bool writer)
103 {
104         struct nvmm_machine *mach;
105         krw_t op = writer ? RW_WRITER : RW_READER;
106
107         if (__predict_false(machid >= NVMM_MAX_MACHINES)) {
108                 return EINVAL;
109         }
110         mach = &machines[machid];
111
112         rw_enter(&mach->lock, op);
113         if (__predict_false(!mach->present)) {
114                 rw_exit(&mach->lock);
115                 return ENOENT;
116         }
117         if (__predict_false(mach->owner != owner && owner != &root_owner)) {
118                 rw_exit(&mach->lock);
119                 return EPERM;
120         }
121         *ret = mach;
122
123         return 0;
124 }
125
126 static void
127 nvmm_machine_put(struct nvmm_machine *mach)
128 {
129         rw_exit(&mach->lock);
130 }
131
132 /* -------------------------------------------------------------------------- */
133
134 static int
135 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
136     struct nvmm_cpu **ret)
137 {
138         struct nvmm_cpu *vcpu;
139
140         if (cpuid >= NVMM_MAX_VCPUS) {
141                 return EINVAL;
142         }
143         vcpu = &mach->cpus[cpuid];
144
145         mutex_enter(&vcpu->lock);
146         if (vcpu->present) {
147                 mutex_exit(&vcpu->lock);
148                 return EBUSY;
149         }
150
151         vcpu->present = true;
152         vcpu->comm = NULL;
153         vcpu->hcpu_last = -1;
154         *ret = vcpu;
155         return 0;
156 }
157
158 static void
159 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
160 {
161         KASSERT(mutex_owned(&vcpu->lock));
162         vcpu->present = false;
163         if (vcpu->comm != NULL) {
164                 uvm_deallocate(kernel_map, (vaddr_t)vcpu->comm, PAGE_SIZE);
165         }
166 }
167
168 static int
169 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
170     struct nvmm_cpu **ret)
171 {
172         struct nvmm_cpu *vcpu;
173
174         if (__predict_false(cpuid >= NVMM_MAX_VCPUS)) {
175                 return EINVAL;
176         }
177         vcpu = &mach->cpus[cpuid];
178
179         mutex_enter(&vcpu->lock);
180         if (__predict_false(!vcpu->present)) {
181                 mutex_exit(&vcpu->lock);
182                 return ENOENT;
183         }
184         *ret = vcpu;
185
186         return 0;
187 }
188
189 static void
190 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
191 {
192         mutex_exit(&vcpu->lock);
193 }
194
195 /* -------------------------------------------------------------------------- */
196
197 static void
198 nvmm_kill_machines(struct nvmm_owner *owner)
199 {
200         struct nvmm_machine *mach;
201         struct nvmm_cpu *vcpu;
202         size_t i, j;
203         int error;
204
205         for (i = 0; i < NVMM_MAX_MACHINES; i++) {
206                 mach = &machines[i];
207
208                 rw_enter(&mach->lock, RW_WRITER);
209                 if (!mach->present || mach->owner != owner) {
210                         rw_exit(&mach->lock);
211                         continue;
212                 }
213
214                 /* Kill it. */
215                 for (j = 0; j < NVMM_MAX_VCPUS; j++) {
216                         error = nvmm_vcpu_get(mach, j, &vcpu);
217                         if (error)
218                                 continue;
219                         (*nvmm_impl->vcpu_destroy)(mach, vcpu);
220                         nvmm_vcpu_free(mach, vcpu);
221                         nvmm_vcpu_put(vcpu);
222                         atomic_dec_uint(&mach->ncpus);
223                 }
224                 (*nvmm_impl->machine_destroy)(mach);
225                 uvmspace_free(mach->vm);
226
227                 /* Drop the kernel UOBJ refs. */
228                 for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) {
229                         if (!mach->hmap[j].present)
230                                 continue;
231                         uao_detach(mach->hmap[j].uobj);
232                 }
233
234                 nvmm_machine_free(mach);
235
236                 rw_exit(&mach->lock);
237         }
238 }
239
240 /* -------------------------------------------------------------------------- */
241
242 static int
243 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args)
244 {
245         args->cap.version = NVMM_KERN_VERSION;
246         args->cap.state_size = nvmm_impl->state_size;
247         args->cap.max_machines = NVMM_MAX_MACHINES;
248         args->cap.max_vcpus = NVMM_MAX_VCPUS;
249         args->cap.max_ram = NVMM_MAX_RAM;
250
251         (*nvmm_impl->capability)(&args->cap);
252
253         return 0;
254 }
255
256 static int
257 nvmm_machine_create(struct nvmm_owner *owner,
258     struct nvmm_ioc_machine_create *args)
259 {
260         struct nvmm_machine *mach;
261         int error;
262
263         error = nvmm_machine_alloc(&mach);
264         if (error)
265                 return error;
266
267         /* Curproc owns the machine. */
268         mach->owner = owner;
269
270         /* Zero out the host mappings. */
271         memset(&mach->hmap, 0, sizeof(mach->hmap));
272
273         /* Create the machine vmspace. */
274         mach->gpa_begin = 0;
275         mach->gpa_end = NVMM_MAX_RAM;
276         mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false);
277
278         /* Create the comm uobj. */
279         mach->commuobj = uao_create(NVMM_MAX_VCPUS * PAGE_SIZE, 0);
280
281         (*nvmm_impl->machine_create)(mach);
282
283         args->machid = mach->machid;
284         nvmm_machine_put(mach);
285
286         return 0;
287 }
288
289 static int
290 nvmm_machine_destroy(struct nvmm_owner *owner,
291     struct nvmm_ioc_machine_destroy *args)
292 {
293         struct nvmm_machine *mach;
294         struct nvmm_cpu *vcpu;
295         int error;
296         size_t i;
297
298         error = nvmm_machine_get(owner, args->machid, &mach, true);
299         if (error)
300                 return error;
301
302         for (i = 0; i < NVMM_MAX_VCPUS; i++) {
303                 error = nvmm_vcpu_get(mach, i, &vcpu);
304                 if (error)
305                         continue;
306
307                 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
308                 nvmm_vcpu_free(mach, vcpu);
309                 nvmm_vcpu_put(vcpu);
310                 atomic_dec_uint(&mach->ncpus);
311         }
312
313         (*nvmm_impl->machine_destroy)(mach);
314
315         /* Free the machine vmspace. */
316         uvmspace_free(mach->vm);
317
318         /* Drop the kernel UOBJ refs. */
319         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
320                 if (!mach->hmap[i].present)
321                         continue;
322                 uao_detach(mach->hmap[i].uobj);
323         }
324
325         nvmm_machine_free(mach);
326         nvmm_machine_put(mach);
327
328         return 0;
329 }
330
331 static int
332 nvmm_machine_configure(struct nvmm_owner *owner,
333     struct nvmm_ioc_machine_configure *args)
334 {
335         struct nvmm_machine *mach;
336         size_t allocsz;
337         uint64_t op;
338         void *data;
339         int error;
340
341         op = NVMM_MACH_CONF_MD(args->op);
342         if (__predict_false(op >= nvmm_impl->mach_conf_max)) {
343                 return EINVAL;
344         }
345
346         allocsz = nvmm_impl->mach_conf_sizes[op];
347         data = kmem_alloc(allocsz, KM_SLEEP);
348
349         error = nvmm_machine_get(owner, args->machid, &mach, true);
350         if (error) {
351                 kmem_free(data, allocsz);
352                 return error;
353         }
354
355         error = copyin(args->conf, data, allocsz);
356         if (error) {
357                 goto out;
358         }
359
360         error = (*nvmm_impl->machine_configure)(mach, op, data);
361
362 out:
363         nvmm_machine_put(mach);
364         kmem_free(data, allocsz);
365         return error;
366 }
367
368 static int
369 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args)
370 {
371         struct nvmm_machine *mach;
372         struct nvmm_cpu *vcpu;
373         int error;
374
375         error = nvmm_machine_get(owner, args->machid, &mach, false);
376         if (error)
377                 return error;
378
379         error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu);
380         if (error)
381                 goto out;
382
383         /* Allocate the comm page. */
384         uao_reference(mach->commuobj);
385         error = uvm_map(kernel_map, (vaddr_t *)&vcpu->comm, PAGE_SIZE,
386             mach->commuobj, args->cpuid * PAGE_SIZE, 0, UVM_MAPFLAG(UVM_PROT_RW,
387             UVM_PROT_RW, UVM_INH_SHARE, UVM_ADV_RANDOM, 0));
388         if (error) {
389                 uao_detach(mach->commuobj);
390                 nvmm_vcpu_free(mach, vcpu);
391                 nvmm_vcpu_put(vcpu);
392                 goto out;
393         }
394         error = uvm_map_pageable(kernel_map, (vaddr_t)vcpu->comm,
395             (vaddr_t)vcpu->comm + PAGE_SIZE, false, 0);
396         if (error) {
397                 nvmm_vcpu_free(mach, vcpu);
398                 nvmm_vcpu_put(vcpu);
399                 goto out;
400         }
401         memset(vcpu->comm, 0, PAGE_SIZE);
402
403         error = (*nvmm_impl->vcpu_create)(mach, vcpu);
404         if (error) {
405                 nvmm_vcpu_free(mach, vcpu);
406                 nvmm_vcpu_put(vcpu);
407                 goto out;
408         }
409
410         nvmm_vcpu_put(vcpu);
411         atomic_inc_uint(&mach->ncpus);
412
413 out:
414         nvmm_machine_put(mach);
415         return error;
416 }
417
418 static int
419 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args)
420 {
421         struct nvmm_machine *mach;
422         struct nvmm_cpu *vcpu;
423         int error;
424
425         error = nvmm_machine_get(owner, args->machid, &mach, false);
426         if (error)
427                 return error;
428
429         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
430         if (error)
431                 goto out;
432
433         (*nvmm_impl->vcpu_destroy)(mach, vcpu);
434         nvmm_vcpu_free(mach, vcpu);
435         nvmm_vcpu_put(vcpu);
436         atomic_dec_uint(&mach->ncpus);
437
438 out:
439         nvmm_machine_put(mach);
440         return error;
441 }
442
443 static int
444 nvmm_vcpu_configure(struct nvmm_owner *owner,
445     struct nvmm_ioc_vcpu_configure *args)
446 {
447         struct nvmm_machine *mach;
448         struct nvmm_cpu *vcpu;
449         size_t allocsz;
450         uint64_t op;
451         void *data;
452         int error;
453
454         op = NVMM_VCPU_CONF_MD(args->op);
455         if (__predict_false(op >= nvmm_impl->vcpu_conf_max))
456                 return EINVAL;
457
458         allocsz = nvmm_impl->vcpu_conf_sizes[op];
459         data = kmem_alloc(allocsz, KM_SLEEP);
460
461         error = nvmm_machine_get(owner, args->machid, &mach, false);
462         if (error) {
463                 kmem_free(data, allocsz);
464                 return error;
465         }
466
467         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
468         if (error) {
469                 nvmm_machine_put(mach);
470                 kmem_free(data, allocsz);
471                 return error;
472         }
473
474         error = copyin(args->conf, data, allocsz);
475         if (error) {
476                 goto out;
477         }
478
479         error = (*nvmm_impl->vcpu_configure)(vcpu, op, data);
480
481 out:
482         nvmm_vcpu_put(vcpu);
483         nvmm_machine_put(mach);
484         kmem_free(data, allocsz);
485         return error;
486 }
487
488 static int
489 nvmm_vcpu_setstate(struct nvmm_owner *owner,
490     struct nvmm_ioc_vcpu_setstate *args)
491 {
492         struct nvmm_machine *mach;
493         struct nvmm_cpu *vcpu;
494         int error;
495
496         error = nvmm_machine_get(owner, args->machid, &mach, false);
497         if (error)
498                 return error;
499
500         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
501         if (error)
502                 goto out;
503
504         (*nvmm_impl->vcpu_setstate)(vcpu);
505         nvmm_vcpu_put(vcpu);
506
507 out:
508         nvmm_machine_put(mach);
509         return error;
510 }
511
512 static int
513 nvmm_vcpu_getstate(struct nvmm_owner *owner,
514     struct nvmm_ioc_vcpu_getstate *args)
515 {
516         struct nvmm_machine *mach;
517         struct nvmm_cpu *vcpu;
518         int error;
519
520         error = nvmm_machine_get(owner, args->machid, &mach, false);
521         if (error)
522                 return error;
523
524         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
525         if (error)
526                 goto out;
527
528         (*nvmm_impl->vcpu_getstate)(vcpu);
529         nvmm_vcpu_put(vcpu);
530
531 out:
532         nvmm_machine_put(mach);
533         return error;
534 }
535
536 static int
537 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args)
538 {
539         struct nvmm_machine *mach;
540         struct nvmm_cpu *vcpu;
541         int error;
542
543         error = nvmm_machine_get(owner, args->machid, &mach, false);
544         if (error)
545                 return error;
546
547         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
548         if (error)
549                 goto out;
550
551         error = (*nvmm_impl->vcpu_inject)(vcpu);
552         nvmm_vcpu_put(vcpu);
553
554 out:
555         nvmm_machine_put(mach);
556         return error;
557 }
558
559 static int
560 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
561     struct nvmm_vcpu_exit *exit)
562 {
563         struct vmspace *vm = mach->vm;
564         int ret;
565
566         while (1) {
567                 /* Got a signal? Or pending resched? Leave. */
568                 if (__predict_false(nvmm_return_needed())) {
569                         exit->reason = NVMM_VCPU_EXIT_NONE;
570                         return 0;
571                 }
572
573                 /* Run the VCPU. */
574                 ret = (*nvmm_impl->vcpu_run)(mach, vcpu, exit);
575                 if (__predict_false(ret != 0)) {
576                         return ret;
577                 }
578
579                 /* Process nested page faults. */
580                 if (__predict_true(exit->reason != NVMM_VCPU_EXIT_MEMORY)) {
581                         break;
582                 }
583                 if (exit->u.mem.gpa >= mach->gpa_end) {
584                         break;
585                 }
586                 if (uvm_fault(&vm->vm_map, exit->u.mem.gpa, exit->u.mem.prot)) {
587                         break;
588                 }
589         }
590
591         return 0;
592 }
593
594 static int
595 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args)
596 {
597         struct nvmm_machine *mach;
598         struct nvmm_cpu *vcpu;
599         int error;
600
601         error = nvmm_machine_get(owner, args->machid, &mach, false);
602         if (error)
603                 return error;
604
605         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
606         if (error)
607                 goto out;
608
609         error = nvmm_do_vcpu_run(mach, vcpu, &args->exit);
610         nvmm_vcpu_put(vcpu);
611
612 out:
613         nvmm_machine_put(mach);
614         return error;
615 }
616
617 /* -------------------------------------------------------------------------- */
618
619 static struct uvm_object *
620 nvmm_hmapping_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size,
621    size_t *off)
622 {
623         struct nvmm_hmapping *hmapping;
624         size_t i;
625
626         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
627                 hmapping = &mach->hmap[i];
628                 if (!hmapping->present) {
629                         continue;
630                 }
631                 if (hva >= hmapping->hva &&
632                     hva + size <= hmapping->hva + hmapping->size) {
633                         *off = hva - hmapping->hva;
634                         return hmapping->uobj;
635                 }
636         }
637
638         return NULL;
639 }
640
641 static int
642 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size)
643 {
644         struct nvmm_hmapping *hmapping;
645         size_t i;
646
647         if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) {
648                 return EINVAL;
649         }
650         if (hva == 0) {
651                 return EINVAL;
652         }
653
654         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
655                 hmapping = &mach->hmap[i];
656                 if (!hmapping->present) {
657                         continue;
658                 }
659
660                 if (hva >= hmapping->hva &&
661                     hva + size <= hmapping->hva + hmapping->size) {
662                         break;
663                 }
664
665                 if (hva >= hmapping->hva &&
666                     hva < hmapping->hva + hmapping->size) {
667                         return EEXIST;
668                 }
669                 if (hva + size > hmapping->hva &&
670                     hva + size <= hmapping->hva + hmapping->size) {
671                         return EEXIST;
672                 }
673                 if (hva <= hmapping->hva &&
674                     hva + size >= hmapping->hva + hmapping->size) {
675                         return EEXIST;
676                 }
677         }
678
679         return 0;
680 }
681
682 static struct nvmm_hmapping *
683 nvmm_hmapping_alloc(struct nvmm_machine *mach)
684 {
685         struct nvmm_hmapping *hmapping;
686         size_t i;
687
688         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
689                 hmapping = &mach->hmap[i];
690                 if (!hmapping->present) {
691                         hmapping->present = true;
692                         return hmapping;
693                 }
694         }
695
696         return NULL;
697 }
698
699 static int
700 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size)
701 {
702         struct vmspace *vmspace = curproc->p_vmspace;
703         struct nvmm_hmapping *hmapping;
704         size_t i;
705
706         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
707                 hmapping = &mach->hmap[i];
708                 if (!hmapping->present || hmapping->hva != hva ||
709                     hmapping->size != size) {
710                         continue;
711                 }
712
713                 uvm_unmap(&vmspace->vm_map, hmapping->hva,
714                     hmapping->hva + hmapping->size);
715                 uao_detach(hmapping->uobj);
716
717                 hmapping->uobj = NULL;
718                 hmapping->present = false;
719
720                 return 0;
721         }
722
723         return ENOENT;
724 }
725
726 static int
727 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args)
728 {
729         struct vmspace *vmspace = curproc->p_vmspace;
730         struct nvmm_machine *mach;
731         struct nvmm_hmapping *hmapping;
732         vaddr_t uva;
733         int error;
734
735         error = nvmm_machine_get(owner, args->machid, &mach, true);
736         if (error)
737                 return error;
738
739         error = nvmm_hmapping_validate(mach, args->hva, args->size);
740         if (error)
741                 goto out;
742
743         hmapping = nvmm_hmapping_alloc(mach);
744         if (hmapping == NULL) {
745                 error = ENOBUFS;
746                 goto out;
747         }
748
749         hmapping->hva = args->hva;
750         hmapping->size = args->size;
751         hmapping->uobj = uao_create(hmapping->size, 0);
752         uva = hmapping->hva;
753
754         /* Take a reference for the user. */
755         uao_reference(hmapping->uobj);
756
757         /* Map the uobj into the user address space, as pageable. */
758         error = uvm_map(&vmspace->vm_map, &uva, hmapping->size, hmapping->uobj,
759             0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE,
760             UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
761         if (error) {
762                 uao_detach(hmapping->uobj);
763         }
764
765 out:
766         nvmm_machine_put(mach);
767         return error;
768 }
769
770 static int
771 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args)
772 {
773         struct nvmm_machine *mach;
774         int error;
775
776         error = nvmm_machine_get(owner, args->machid, &mach, true);
777         if (error)
778                 return error;
779
780         error = nvmm_hmapping_free(mach, args->hva, args->size);
781
782         nvmm_machine_put(mach);
783         return error;
784 }
785
786 /* -------------------------------------------------------------------------- */
787
788 static int
789 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args)
790 {
791         struct nvmm_machine *mach;
792         struct uvm_object *uobj;
793         gpaddr_t gpa;
794         size_t off;
795         int error;
796
797         error = nvmm_machine_get(owner, args->machid, &mach, false);
798         if (error)
799                 return error;
800
801         if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) {
802                 error = EINVAL;
803                 goto out;
804         }
805
806         if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
807             (args->hva % PAGE_SIZE) != 0) {
808                 error = EINVAL;
809                 goto out;
810         }
811         if (args->hva == 0) {
812                 error = EINVAL;
813                 goto out;
814         }
815         if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
816                 error = EINVAL;
817                 goto out;
818         }
819         if (args->gpa + args->size <= args->gpa) {
820                 error = EINVAL;
821                 goto out;
822         }
823         if (args->gpa + args->size > mach->gpa_end) {
824                 error = EINVAL;
825                 goto out;
826         }
827         gpa = args->gpa;
828
829         uobj = nvmm_hmapping_getuobj(mach, args->hva, args->size, &off);
830         if (uobj == NULL) {
831                 error = EINVAL;
832                 goto out;
833         }
834
835         /* Take a reference for the machine. */
836         uao_reference(uobj);
837
838         /* Map the uobj into the machine address space, as pageable. */
839         error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0,
840             UVM_MAPFLAG(args->prot, UVM_PROT_RWX, UVM_INH_NONE,
841             UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
842         if (error) {
843                 uao_detach(uobj);
844                 goto out;
845         }
846         if (gpa != args->gpa) {
847                 uao_detach(uobj);
848                 printf("[!] uvm_map problem\n");
849                 error = EINVAL;
850                 goto out;
851         }
852
853 out:
854         nvmm_machine_put(mach);
855         return error;
856 }
857
858 static int
859 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args)
860 {
861         struct nvmm_machine *mach;
862         gpaddr_t gpa;
863         int error;
864
865         error = nvmm_machine_get(owner, args->machid, &mach, false);
866         if (error)
867                 return error;
868
869         if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
870                 error = EINVAL;
871                 goto out;
872         }
873         if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
874                 error = EINVAL;
875                 goto out;
876         }
877         if (args->gpa + args->size <= args->gpa) {
878                 error = EINVAL;
879                 goto out;
880         }
881         if (args->gpa + args->size >= mach->gpa_end) {
882                 error = EINVAL;
883                 goto out;
884         }
885         gpa = args->gpa;
886
887         /* Unmap the memory from the machine. */
888         uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size);
889
890 out:
891         nvmm_machine_put(mach);
892         return error;
893 }
894
895 /* -------------------------------------------------------------------------- */
896
897 static int
898 nvmm_ctl_mach_info(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
899 {
900         struct nvmm_ctl_mach_info ctl;
901         struct nvmm_machine *mach;
902         int error;
903         size_t i;
904
905         if (args->size != sizeof(ctl))
906                 return EINVAL;
907         error = copyin(args->data, &ctl, sizeof(ctl));
908         if (error)
909                 return error;
910
911         error = nvmm_machine_get(owner, ctl.machid, &mach, true);
912         if (error)
913                 return error;
914
915         ctl.nvcpus = mach->ncpus;
916
917         ctl.nram = 0;
918         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
919                 if (!mach->hmap[i].present)
920                         continue;
921                 ctl.nram += mach->hmap[i].size;
922         }
923
924         ctl.pid = mach->owner->pid;
925         ctl.time = mach->time;
926
927         nvmm_machine_put(mach);
928
929         error = copyout(&ctl, args->data, sizeof(ctl));
930         if (error)
931                 return error;
932
933         return 0;
934 }
935
936 static int
937 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
938 {
939         switch (args->op) {
940         case NVMM_CTL_MACH_INFO:
941                 return nvmm_ctl_mach_info(owner, args);
942         default:
943                 return EINVAL;
944         }
945 }
946
947 /* -------------------------------------------------------------------------- */
948
949 static const struct nvmm_impl *
950 nvmm_ident(void)
951 {
952         size_t i;
953
954         for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
955                 if ((*nvmm_impl_list[i]->ident)())
956                         return nvmm_impl_list[i];
957         }
958
959         return NULL;
960 }
961
962 static int
963 nvmm_init(void)
964 {
965         size_t i, n;
966
967         nvmm_impl = nvmm_ident();
968         if (nvmm_impl == NULL)
969                 return ENOTSUP;
970
971         for (i = 0; i < NVMM_MAX_MACHINES; i++) {
972                 machines[i].machid = i;
973                 rw_init(&machines[i].lock);
974                 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
975                         machines[i].cpus[n].present = false;
976                         machines[i].cpus[n].cpuid = n;
977                         mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT,
978                             IPL_NONE);
979                 }
980         }
981
982         (*nvmm_impl->init)();
983
984         return 0;
985 }
986
987 static void
988 nvmm_fini(void)
989 {
990         size_t i, n;
991
992         for (i = 0; i < NVMM_MAX_MACHINES; i++) {
993                 rw_destroy(&machines[i].lock);
994                 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
995                         mutex_destroy(&machines[i].cpus[n].lock);
996                 }
997         }
998
999         (*nvmm_impl->fini)();
1000         nvmm_impl = NULL;
1001 }
1002
1003 /* -------------------------------------------------------------------------- */
1004
1005 static d_open_t nvmm_open;
1006 static d_ioctl_t nvmm_ioctl;
1007 static d_mmap_single_t nvmm_mmap_single;
1008 static d_priv_dtor_t nvmm_dtor;
1009
1010 static struct dev_ops nvmm_ops = {
1011         { "nvmm", 0, D_MPSAFE },
1012         .d_open         = nvmm_open,
1013         .d_ioctl        = nvmm_ioctl,
1014         .d_mmap_single  = nvmm_mmap_single,
1015 };
1016
1017 static int
1018 nvmm_open(struct dev_open_args *ap)
1019 {
1020         int flags = ap->a_oflags;
1021         struct nvmm_owner *owner;
1022         struct file *fp;
1023         int error;
1024
1025         if (__predict_false(nvmm_impl == NULL))
1026                 return ENXIO;
1027         if (!(flags & O_CLOEXEC))
1028                 return EINVAL;
1029
1030         if (priv_check_cred(ap->a_cred, PRIV_ROOT, 0) == 0) {
1031                 owner = &root_owner;
1032         } else {
1033                 owner = kmem_alloc(sizeof(*owner), KM_SLEEP);
1034                 owner->pid = curthread->td_proc->p_pid;
1035         }
1036
1037         fp = ap->a_fpp ? *ap->a_fpp : NULL;
1038         error = devfs_set_cdevpriv(fp, owner, nvmm_dtor);
1039         if (error) {
1040                 nvmm_dtor(owner);
1041                 return error;
1042         }
1043
1044         return 0;
1045 }
1046
1047 static void
1048 nvmm_dtor(void *arg)
1049 {
1050         struct nvmm_owner *owner = arg;
1051
1052         KASSERT(owner != NULL);
1053         nvmm_kill_machines(owner);
1054         if (owner != &root_owner) {
1055                 kmem_free(owner, sizeof(*owner));
1056         }
1057 }
1058
1059 static int
1060 nvmm_mmap_single(struct dev_mmap_single_args *ap)
1061 {
1062         vm_ooffset_t *offp = ap->a_offset;
1063         size_t size = ap->a_size;
1064         int prot = ap->a_nprot;
1065         struct vm_object **uobjp = ap->a_object;
1066         struct file *fp = ap->a_fp;
1067         struct nvmm_owner *owner = NULL;
1068         struct nvmm_machine *mach;
1069         nvmm_machid_t machid;
1070         nvmm_cpuid_t cpuid;
1071         int error;
1072
1073         devfs_get_cdevpriv(fp, (void **)&owner);
1074         KASSERT(owner != NULL);
1075
1076         if (prot & PROT_EXEC)
1077                 return EACCES;
1078         if (size != PAGE_SIZE)
1079                 return EINVAL;
1080
1081         cpuid = NVMM_COMM_CPUID(*offp);
1082         if (__predict_false(cpuid >= NVMM_MAX_VCPUS))
1083                 return EINVAL;
1084
1085         machid = NVMM_COMM_MACHID(*offp);
1086         error = nvmm_machine_get(owner, machid, &mach, false);
1087         if (error)
1088                 return error;
1089
1090         uao_reference(mach->commuobj);
1091         *uobjp = mach->commuobj;
1092         *offp = cpuid * PAGE_SIZE;
1093
1094         nvmm_machine_put(mach);
1095         return 0;
1096 }
1097
1098 static int
1099 nvmm_ioctl(struct dev_ioctl_args *ap)
1100 {
1101         unsigned long cmd = ap->a_cmd;
1102         void *data = ap->a_data;
1103         struct file *fp = ap->a_fp;
1104         struct nvmm_owner *owner = NULL;
1105
1106         devfs_get_cdevpriv(fp, (void **)&owner);
1107         KASSERT(owner != NULL);
1108
1109         switch (cmd) {
1110         case NVMM_IOC_CAPABILITY:
1111                 return nvmm_capability(owner, data);
1112         case NVMM_IOC_MACHINE_CREATE:
1113                 return nvmm_machine_create(owner, data);
1114         case NVMM_IOC_MACHINE_DESTROY:
1115                 return nvmm_machine_destroy(owner, data);
1116         case NVMM_IOC_MACHINE_CONFIGURE:
1117                 return nvmm_machine_configure(owner, data);
1118         case NVMM_IOC_VCPU_CREATE:
1119                 return nvmm_vcpu_create(owner, data);
1120         case NVMM_IOC_VCPU_DESTROY:
1121                 return nvmm_vcpu_destroy(owner, data);
1122         case NVMM_IOC_VCPU_CONFIGURE:
1123                 return nvmm_vcpu_configure(owner, data);
1124         case NVMM_IOC_VCPU_SETSTATE:
1125                 return nvmm_vcpu_setstate(owner, data);
1126         case NVMM_IOC_VCPU_GETSTATE:
1127                 return nvmm_vcpu_getstate(owner, data);
1128         case NVMM_IOC_VCPU_INJECT:
1129                 return nvmm_vcpu_inject(owner, data);
1130         case NVMM_IOC_VCPU_RUN:
1131                 return nvmm_vcpu_run(owner, data);
1132         case NVMM_IOC_GPA_MAP:
1133                 return nvmm_gpa_map(owner, data);
1134         case NVMM_IOC_GPA_UNMAP:
1135                 return nvmm_gpa_unmap(owner, data);
1136         case NVMM_IOC_HVA_MAP:
1137                 return nvmm_hva_map(owner, data);
1138         case NVMM_IOC_HVA_UNMAP:
1139                 return nvmm_hva_unmap(owner, data);
1140         case NVMM_IOC_CTL:
1141                 return nvmm_ctl(owner, data);
1142         default:
1143                 return EINVAL;
1144         }
1145 }
1146
1147 /* -------------------------------------------------------------------------- */
1148
1149 static int
1150 nvmm_attach(void)
1151 {
1152         int error;
1153
1154         error = nvmm_init();
1155         if (error)
1156                 panic("%s: impossible", __func__);
1157         printf("nvmm: attached, using backend %s\n", nvmm_impl->name);
1158
1159         return 0;
1160 }
1161
1162 static int
1163 nvmm_detach(void)
1164 {
1165         if (atomic_load_acq_int(&nmachines) > 0)
1166                 return EBUSY;
1167
1168         nvmm_fini();
1169         return 0;
1170 }
1171
1172 static int
1173 nvmm_modevent(module_t mod __unused, int type, void *data __unused)
1174 {
1175         static cdev_t dev = NULL;
1176         int error;
1177
1178         switch (type) {
1179         case MOD_LOAD:
1180                 if (nvmm_ident() == NULL) {
1181                         printf("nvmm: cpu not supported\n");
1182                         return ENOTSUP;
1183                 }
1184                 error = nvmm_attach();
1185                 if (error)
1186                         return error;
1187
1188                 dev = make_dev(&nvmm_ops, 0, UID_ROOT, GID_NVMM, 0660, "nvmm");
1189                 if (dev == NULL) {
1190                         printf("nvmm: unable to create device\n");
1191                         error = ENOMEM;
1192                 }
1193                 break;
1194
1195         case MOD_UNLOAD:
1196                 if (dev == NULL)
1197                         return 0;
1198                 error = nvmm_detach();
1199                 if (error == 0)
1200                         destroy_dev(dev);
1201                 break;
1202
1203         case MOD_SHUTDOWN:
1204                 error = 0;
1205                 break;
1206
1207         default:
1208                 error = EOPNOTSUPP;
1209                 break;
1210         }
1211
1212         return error;
1213 }
1214
1215 static moduledata_t nvmm_moddata = {
1216         .name = "nvmm",
1217         .evhand = nvmm_modevent,
1218         .priv = NULL,
1219 };
1220
1221 DECLARE_MODULE(nvmm, nvmm_moddata, SI_SUB_PSEUDO, SI_ORDER_ANY);
1222 MODULE_VERSION(nvmm, NVMM_KERN_VERSION);