nvmm: Port to DragonFly #9: atomic operations
[dragonfly.git] / sys / dev / virtual / nvmm / nvmm.c
1 /*      $NetBSD: nvmm.c,v 1.22.2.7 2020/08/29 17:00:28 martin Exp $     */
2
3 /*
4  * Copyright (c) 2018-2020 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Maxime Villard.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.22.2.7 2020/08/29 17:00:28 martin Exp $");
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38
39 #include <sys/atomic.h>
40 #include <sys/cpu.h>
41 #include <sys/conf.h>
42 #include <sys/kmem.h>
43 #include <sys/module.h>
44 #include <sys/proc.h>
45 #include <sys/mman.h>
46 #include <sys/file.h>
47 #include <sys/filedesc.h>
48 #include <sys/device.h>
49
50 #include <uvm/uvm.h>
51 #include <uvm/uvm_page.h>
52
53 #include "ioconf.h"
54
55 #include <dev/nvmm/nvmm.h>
56 #include <dev/nvmm/nvmm_internal.h>
57 #include <dev/nvmm/nvmm_ioctl.h>
58
59 MALLOC_DEFINE(M_NVMM, "nvmm", "NVMM data");
60
61 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
62 static volatile unsigned int nmachines __cacheline_aligned;
63
64 static const struct nvmm_impl *nvmm_impl_list[] = {
65 #if defined(__x86_64__)
66         &nvmm_x86_svm,  /* x86 AMD SVM */
67         &nvmm_x86_vmx   /* x86 Intel VMX */
68 #endif
69 };
70
71 static const struct nvmm_impl *nvmm_impl = NULL;
72
73 static struct nvmm_owner root_owner;
74
75 /* -------------------------------------------------------------------------- */
76
77 static int
78 nvmm_machine_alloc(struct nvmm_machine **ret)
79 {
80         struct nvmm_machine *mach;
81         size_t i;
82
83         for (i = 0; i < NVMM_MAX_MACHINES; i++) {
84                 mach = &machines[i];
85
86                 rw_enter(&mach->lock, RW_WRITER);
87                 if (mach->present) {
88                         rw_exit(&mach->lock);
89                         continue;
90                 }
91
92                 mach->present = true;
93                 mach->time = time_second;
94                 *ret = mach;
95                 atomic_inc_uint(&nmachines);
96                 return 0;
97         }
98
99         return ENOBUFS;
100 }
101
102 static void
103 nvmm_machine_free(struct nvmm_machine *mach)
104 {
105         KASSERT(rw_write_held(&mach->lock));
106         KASSERT(mach->present);
107         mach->present = false;
108         atomic_dec_uint(&nmachines);
109 }
110
111 static int
112 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid,
113     struct nvmm_machine **ret, bool writer)
114 {
115         struct nvmm_machine *mach;
116         krw_t op = writer ? RW_WRITER : RW_READER;
117
118         if (__predict_false(machid >= NVMM_MAX_MACHINES)) {
119                 return EINVAL;
120         }
121         mach = &machines[machid];
122
123         rw_enter(&mach->lock, op);
124         if (__predict_false(!mach->present)) {
125                 rw_exit(&mach->lock);
126                 return ENOENT;
127         }
128         if (__predict_false(mach->owner != owner && owner != &root_owner)) {
129                 rw_exit(&mach->lock);
130                 return EPERM;
131         }
132         *ret = mach;
133
134         return 0;
135 }
136
137 static void
138 nvmm_machine_put(struct nvmm_machine *mach)
139 {
140         rw_exit(&mach->lock);
141 }
142
143 /* -------------------------------------------------------------------------- */
144
145 static int
146 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
147     struct nvmm_cpu **ret)
148 {
149         struct nvmm_cpu *vcpu;
150
151         if (cpuid >= NVMM_MAX_VCPUS) {
152                 return EINVAL;
153         }
154         vcpu = &mach->cpus[cpuid];
155
156         mutex_enter(&vcpu->lock);
157         if (vcpu->present) {
158                 mutex_exit(&vcpu->lock);
159                 return EBUSY;
160         }
161
162         vcpu->present = true;
163         vcpu->comm = NULL;
164         vcpu->hcpu_last = -1;
165         *ret = vcpu;
166         return 0;
167 }
168
169 static void
170 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
171 {
172         KASSERT(mutex_owned(&vcpu->lock));
173         vcpu->present = false;
174         if (vcpu->comm != NULL) {
175                 uvm_deallocate(kernel_map, (vaddr_t)vcpu->comm, PAGE_SIZE);
176         }
177 }
178
179 static int
180 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
181     struct nvmm_cpu **ret)
182 {
183         struct nvmm_cpu *vcpu;
184
185         if (__predict_false(cpuid >= NVMM_MAX_VCPUS)) {
186                 return EINVAL;
187         }
188         vcpu = &mach->cpus[cpuid];
189
190         mutex_enter(&vcpu->lock);
191         if (__predict_false(!vcpu->present)) {
192                 mutex_exit(&vcpu->lock);
193                 return ENOENT;
194         }
195         *ret = vcpu;
196
197         return 0;
198 }
199
200 static void
201 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
202 {
203         mutex_exit(&vcpu->lock);
204 }
205
206 /* -------------------------------------------------------------------------- */
207
208 static void
209 nvmm_kill_machines(struct nvmm_owner *owner)
210 {
211         struct nvmm_machine *mach;
212         struct nvmm_cpu *vcpu;
213         size_t i, j;
214         int error;
215
216         for (i = 0; i < NVMM_MAX_MACHINES; i++) {
217                 mach = &machines[i];
218
219                 rw_enter(&mach->lock, RW_WRITER);
220                 if (!mach->present || mach->owner != owner) {
221                         rw_exit(&mach->lock);
222                         continue;
223                 }
224
225                 /* Kill it. */
226                 for (j = 0; j < NVMM_MAX_VCPUS; j++) {
227                         error = nvmm_vcpu_get(mach, j, &vcpu);
228                         if (error)
229                                 continue;
230                         (*nvmm_impl->vcpu_destroy)(mach, vcpu);
231                         nvmm_vcpu_free(mach, vcpu);
232                         nvmm_vcpu_put(vcpu);
233                         atomic_dec_uint(&mach->ncpus);
234                 }
235                 (*nvmm_impl->machine_destroy)(mach);
236                 uvmspace_free(mach->vm);
237
238                 /* Drop the kernel UOBJ refs. */
239                 for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) {
240                         if (!mach->hmap[j].present)
241                                 continue;
242                         uao_detach(mach->hmap[j].uobj);
243                 }
244
245                 nvmm_machine_free(mach);
246
247                 rw_exit(&mach->lock);
248         }
249 }
250
251 /* -------------------------------------------------------------------------- */
252
253 static int
254 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args)
255 {
256         args->cap.version = NVMM_KERN_VERSION;
257         args->cap.state_size = nvmm_impl->state_size;
258         args->cap.max_machines = NVMM_MAX_MACHINES;
259         args->cap.max_vcpus = NVMM_MAX_VCPUS;
260         args->cap.max_ram = NVMM_MAX_RAM;
261
262         (*nvmm_impl->capability)(&args->cap);
263
264         return 0;
265 }
266
267 static int
268 nvmm_machine_create(struct nvmm_owner *owner,
269     struct nvmm_ioc_machine_create *args)
270 {
271         struct nvmm_machine *mach;
272         int error;
273
274         error = nvmm_machine_alloc(&mach);
275         if (error)
276                 return error;
277
278         /* Curproc owns the machine. */
279         mach->owner = owner;
280
281         /* Zero out the host mappings. */
282         memset(&mach->hmap, 0, sizeof(mach->hmap));
283
284         /* Create the machine vmspace. */
285         mach->gpa_begin = 0;
286         mach->gpa_end = NVMM_MAX_RAM;
287         mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false);
288
289         /* Create the comm uobj. */
290         mach->commuobj = uao_create(NVMM_MAX_VCPUS * PAGE_SIZE, 0);
291
292         (*nvmm_impl->machine_create)(mach);
293
294         args->machid = mach->machid;
295         nvmm_machine_put(mach);
296
297         return 0;
298 }
299
300 static int
301 nvmm_machine_destroy(struct nvmm_owner *owner,
302     struct nvmm_ioc_machine_destroy *args)
303 {
304         struct nvmm_machine *mach;
305         struct nvmm_cpu *vcpu;
306         int error;
307         size_t i;
308
309         error = nvmm_machine_get(owner, args->machid, &mach, true);
310         if (error)
311                 return error;
312
313         for (i = 0; i < NVMM_MAX_VCPUS; i++) {
314                 error = nvmm_vcpu_get(mach, i, &vcpu);
315                 if (error)
316                         continue;
317
318                 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
319                 nvmm_vcpu_free(mach, vcpu);
320                 nvmm_vcpu_put(vcpu);
321                 atomic_dec_uint(&mach->ncpus);
322         }
323
324         (*nvmm_impl->machine_destroy)(mach);
325
326         /* Free the machine vmspace. */
327         uvmspace_free(mach->vm);
328
329         /* Drop the kernel UOBJ refs. */
330         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
331                 if (!mach->hmap[i].present)
332                         continue;
333                 uao_detach(mach->hmap[i].uobj);
334         }
335
336         nvmm_machine_free(mach);
337         nvmm_machine_put(mach);
338
339         return 0;
340 }
341
342 static int
343 nvmm_machine_configure(struct nvmm_owner *owner,
344     struct nvmm_ioc_machine_configure *args)
345 {
346         struct nvmm_machine *mach;
347         size_t allocsz;
348         uint64_t op;
349         void *data;
350         int error;
351
352         op = NVMM_MACH_CONF_MD(args->op);
353         if (__predict_false(op >= nvmm_impl->mach_conf_max)) {
354                 return EINVAL;
355         }
356
357         allocsz = nvmm_impl->mach_conf_sizes[op];
358         data = kmem_alloc(allocsz, KM_SLEEP);
359
360         error = nvmm_machine_get(owner, args->machid, &mach, true);
361         if (error) {
362                 kmem_free(data, allocsz);
363                 return error;
364         }
365
366         error = copyin(args->conf, data, allocsz);
367         if (error) {
368                 goto out;
369         }
370
371         error = (*nvmm_impl->machine_configure)(mach, op, data);
372
373 out:
374         nvmm_machine_put(mach);
375         kmem_free(data, allocsz);
376         return error;
377 }
378
379 static int
380 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args)
381 {
382         struct nvmm_machine *mach;
383         struct nvmm_cpu *vcpu;
384         int error;
385
386         error = nvmm_machine_get(owner, args->machid, &mach, false);
387         if (error)
388                 return error;
389
390         error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu);
391         if (error)
392                 goto out;
393
394         /* Allocate the comm page. */
395         uao_reference(mach->commuobj);
396         error = uvm_map(kernel_map, (vaddr_t *)&vcpu->comm, PAGE_SIZE,
397             mach->commuobj, args->cpuid * PAGE_SIZE, 0, UVM_MAPFLAG(UVM_PROT_RW,
398             UVM_PROT_RW, UVM_INH_SHARE, UVM_ADV_RANDOM, 0));
399         if (error) {
400                 uao_detach(mach->commuobj);
401                 nvmm_vcpu_free(mach, vcpu);
402                 nvmm_vcpu_put(vcpu);
403                 goto out;
404         }
405         error = uvm_map_pageable(kernel_map, (vaddr_t)vcpu->comm,
406             (vaddr_t)vcpu->comm + PAGE_SIZE, false, 0);
407         if (error) {
408                 nvmm_vcpu_free(mach, vcpu);
409                 nvmm_vcpu_put(vcpu);
410                 goto out;
411         }
412         memset(vcpu->comm, 0, PAGE_SIZE);
413
414         error = (*nvmm_impl->vcpu_create)(mach, vcpu);
415         if (error) {
416                 nvmm_vcpu_free(mach, vcpu);
417                 nvmm_vcpu_put(vcpu);
418                 goto out;
419         }
420
421         nvmm_vcpu_put(vcpu);
422         atomic_inc_uint(&mach->ncpus);
423
424 out:
425         nvmm_machine_put(mach);
426         return error;
427 }
428
429 static int
430 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args)
431 {
432         struct nvmm_machine *mach;
433         struct nvmm_cpu *vcpu;
434         int error;
435
436         error = nvmm_machine_get(owner, args->machid, &mach, false);
437         if (error)
438                 return error;
439
440         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
441         if (error)
442                 goto out;
443
444         (*nvmm_impl->vcpu_destroy)(mach, vcpu);
445         nvmm_vcpu_free(mach, vcpu);
446         nvmm_vcpu_put(vcpu);
447         atomic_dec_uint(&mach->ncpus);
448
449 out:
450         nvmm_machine_put(mach);
451         return error;
452 }
453
454 static int
455 nvmm_vcpu_configure(struct nvmm_owner *owner,
456     struct nvmm_ioc_vcpu_configure *args)
457 {
458         struct nvmm_machine *mach;
459         struct nvmm_cpu *vcpu;
460         size_t allocsz;
461         uint64_t op;
462         void *data;
463         int error;
464
465         op = NVMM_VCPU_CONF_MD(args->op);
466         if (__predict_false(op >= nvmm_impl->vcpu_conf_max))
467                 return EINVAL;
468
469         allocsz = nvmm_impl->vcpu_conf_sizes[op];
470         data = kmem_alloc(allocsz, KM_SLEEP);
471
472         error = nvmm_machine_get(owner, args->machid, &mach, false);
473         if (error) {
474                 kmem_free(data, allocsz);
475                 return error;
476         }
477
478         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
479         if (error) {
480                 nvmm_machine_put(mach);
481                 kmem_free(data, allocsz);
482                 return error;
483         }
484
485         error = copyin(args->conf, data, allocsz);
486         if (error) {
487                 goto out;
488         }
489
490         error = (*nvmm_impl->vcpu_configure)(vcpu, op, data);
491
492 out:
493         nvmm_vcpu_put(vcpu);
494         nvmm_machine_put(mach);
495         kmem_free(data, allocsz);
496         return error;
497 }
498
499 static int
500 nvmm_vcpu_setstate(struct nvmm_owner *owner,
501     struct nvmm_ioc_vcpu_setstate *args)
502 {
503         struct nvmm_machine *mach;
504         struct nvmm_cpu *vcpu;
505         int error;
506
507         error = nvmm_machine_get(owner, args->machid, &mach, false);
508         if (error)
509                 return error;
510
511         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
512         if (error)
513                 goto out;
514
515         (*nvmm_impl->vcpu_setstate)(vcpu);
516         nvmm_vcpu_put(vcpu);
517
518 out:
519         nvmm_machine_put(mach);
520         return error;
521 }
522
523 static int
524 nvmm_vcpu_getstate(struct nvmm_owner *owner,
525     struct nvmm_ioc_vcpu_getstate *args)
526 {
527         struct nvmm_machine *mach;
528         struct nvmm_cpu *vcpu;
529         int error;
530
531         error = nvmm_machine_get(owner, args->machid, &mach, false);
532         if (error)
533                 return error;
534
535         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
536         if (error)
537                 goto out;
538
539         (*nvmm_impl->vcpu_getstate)(vcpu);
540         nvmm_vcpu_put(vcpu);
541
542 out:
543         nvmm_machine_put(mach);
544         return error;
545 }
546
547 static int
548 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args)
549 {
550         struct nvmm_machine *mach;
551         struct nvmm_cpu *vcpu;
552         int error;
553
554         error = nvmm_machine_get(owner, args->machid, &mach, false);
555         if (error)
556                 return error;
557
558         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
559         if (error)
560                 goto out;
561
562         error = (*nvmm_impl->vcpu_inject)(vcpu);
563         nvmm_vcpu_put(vcpu);
564
565 out:
566         nvmm_machine_put(mach);
567         return error;
568 }
569
570 static int
571 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
572     struct nvmm_vcpu_exit *exit)
573 {
574         struct vmspace *vm = mach->vm;
575         int ret;
576
577         while (1) {
578                 /* Got a signal? Or pending resched? Leave. */
579                 if (__predict_false(nvmm_return_needed())) {
580                         exit->reason = NVMM_VCPU_EXIT_NONE;
581                         return 0;
582                 }
583
584                 /* Run the VCPU. */
585                 ret = (*nvmm_impl->vcpu_run)(mach, vcpu, exit);
586                 if (__predict_false(ret != 0)) {
587                         return ret;
588                 }
589
590                 /* Process nested page faults. */
591                 if (__predict_true(exit->reason != NVMM_VCPU_EXIT_MEMORY)) {
592                         break;
593                 }
594                 if (exit->u.mem.gpa >= mach->gpa_end) {
595                         break;
596                 }
597                 if (uvm_fault(&vm->vm_map, exit->u.mem.gpa, exit->u.mem.prot)) {
598                         break;
599                 }
600         }
601
602         return 0;
603 }
604
605 static int
606 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args)
607 {
608         struct nvmm_machine *mach;
609         struct nvmm_cpu *vcpu;
610         int error;
611
612         error = nvmm_machine_get(owner, args->machid, &mach, false);
613         if (error)
614                 return error;
615
616         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
617         if (error)
618                 goto out;
619
620         error = nvmm_do_vcpu_run(mach, vcpu, &args->exit);
621         nvmm_vcpu_put(vcpu);
622
623 out:
624         nvmm_machine_put(mach);
625         return error;
626 }
627
628 /* -------------------------------------------------------------------------- */
629
630 static struct uvm_object *
631 nvmm_hmapping_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size,
632    size_t *off)
633 {
634         struct nvmm_hmapping *hmapping;
635         size_t i;
636
637         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
638                 hmapping = &mach->hmap[i];
639                 if (!hmapping->present) {
640                         continue;
641                 }
642                 if (hva >= hmapping->hva &&
643                     hva + size <= hmapping->hva + hmapping->size) {
644                         *off = hva - hmapping->hva;
645                         return hmapping->uobj;
646                 }
647         }
648
649         return NULL;
650 }
651
652 static int
653 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size)
654 {
655         struct nvmm_hmapping *hmapping;
656         size_t i;
657
658         if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) {
659                 return EINVAL;
660         }
661         if (hva == 0) {
662                 return EINVAL;
663         }
664
665         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
666                 hmapping = &mach->hmap[i];
667                 if (!hmapping->present) {
668                         continue;
669                 }
670
671                 if (hva >= hmapping->hva &&
672                     hva + size <= hmapping->hva + hmapping->size) {
673                         break;
674                 }
675
676                 if (hva >= hmapping->hva &&
677                     hva < hmapping->hva + hmapping->size) {
678                         return EEXIST;
679                 }
680                 if (hva + size > hmapping->hva &&
681                     hva + size <= hmapping->hva + hmapping->size) {
682                         return EEXIST;
683                 }
684                 if (hva <= hmapping->hva &&
685                     hva + size >= hmapping->hva + hmapping->size) {
686                         return EEXIST;
687                 }
688         }
689
690         return 0;
691 }
692
693 static struct nvmm_hmapping *
694 nvmm_hmapping_alloc(struct nvmm_machine *mach)
695 {
696         struct nvmm_hmapping *hmapping;
697         size_t i;
698
699         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
700                 hmapping = &mach->hmap[i];
701                 if (!hmapping->present) {
702                         hmapping->present = true;
703                         return hmapping;
704                 }
705         }
706
707         return NULL;
708 }
709
710 static int
711 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size)
712 {
713         struct vmspace *vmspace = curproc->p_vmspace;
714         struct nvmm_hmapping *hmapping;
715         size_t i;
716
717         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
718                 hmapping = &mach->hmap[i];
719                 if (!hmapping->present || hmapping->hva != hva ||
720                     hmapping->size != size) {
721                         continue;
722                 }
723
724                 uvm_unmap(&vmspace->vm_map, hmapping->hva,
725                     hmapping->hva + hmapping->size);
726                 uao_detach(hmapping->uobj);
727
728                 hmapping->uobj = NULL;
729                 hmapping->present = false;
730
731                 return 0;
732         }
733
734         return ENOENT;
735 }
736
737 static int
738 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args)
739 {
740         struct vmspace *vmspace = curproc->p_vmspace;
741         struct nvmm_machine *mach;
742         struct nvmm_hmapping *hmapping;
743         vaddr_t uva;
744         int error;
745
746         error = nvmm_machine_get(owner, args->machid, &mach, true);
747         if (error)
748                 return error;
749
750         error = nvmm_hmapping_validate(mach, args->hva, args->size);
751         if (error)
752                 goto out;
753
754         hmapping = nvmm_hmapping_alloc(mach);
755         if (hmapping == NULL) {
756                 error = ENOBUFS;
757                 goto out;
758         }
759
760         hmapping->hva = args->hva;
761         hmapping->size = args->size;
762         hmapping->uobj = uao_create(hmapping->size, 0);
763         uva = hmapping->hva;
764
765         /* Take a reference for the user. */
766         uao_reference(hmapping->uobj);
767
768         /* Map the uobj into the user address space, as pageable. */
769         error = uvm_map(&vmspace->vm_map, &uva, hmapping->size, hmapping->uobj,
770             0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE,
771             UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
772         if (error) {
773                 uao_detach(hmapping->uobj);
774         }
775
776 out:
777         nvmm_machine_put(mach);
778         return error;
779 }
780
781 static int
782 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args)
783 {
784         struct nvmm_machine *mach;
785         int error;
786
787         error = nvmm_machine_get(owner, args->machid, &mach, true);
788         if (error)
789                 return error;
790
791         error = nvmm_hmapping_free(mach, args->hva, args->size);
792
793         nvmm_machine_put(mach);
794         return error;
795 }
796
797 /* -------------------------------------------------------------------------- */
798
799 static int
800 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args)
801 {
802         struct nvmm_machine *mach;
803         struct uvm_object *uobj;
804         gpaddr_t gpa;
805         size_t off;
806         int error;
807
808         error = nvmm_machine_get(owner, args->machid, &mach, false);
809         if (error)
810                 return error;
811
812         if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) {
813                 error = EINVAL;
814                 goto out;
815         }
816
817         if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
818             (args->hva % PAGE_SIZE) != 0) {
819                 error = EINVAL;
820                 goto out;
821         }
822         if (args->hva == 0) {
823                 error = EINVAL;
824                 goto out;
825         }
826         if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
827                 error = EINVAL;
828                 goto out;
829         }
830         if (args->gpa + args->size <= args->gpa) {
831                 error = EINVAL;
832                 goto out;
833         }
834         if (args->gpa + args->size > mach->gpa_end) {
835                 error = EINVAL;
836                 goto out;
837         }
838         gpa = args->gpa;
839
840         uobj = nvmm_hmapping_getuobj(mach, args->hva, args->size, &off);
841         if (uobj == NULL) {
842                 error = EINVAL;
843                 goto out;
844         }
845
846         /* Take a reference for the machine. */
847         uao_reference(uobj);
848
849         /* Map the uobj into the machine address space, as pageable. */
850         error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0,
851             UVM_MAPFLAG(args->prot, UVM_PROT_RWX, UVM_INH_NONE,
852             UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
853         if (error) {
854                 uao_detach(uobj);
855                 goto out;
856         }
857         if (gpa != args->gpa) {
858                 uao_detach(uobj);
859                 printf("[!] uvm_map problem\n");
860                 error = EINVAL;
861                 goto out;
862         }
863
864 out:
865         nvmm_machine_put(mach);
866         return error;
867 }
868
869 static int
870 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args)
871 {
872         struct nvmm_machine *mach;
873         gpaddr_t gpa;
874         int error;
875
876         error = nvmm_machine_get(owner, args->machid, &mach, false);
877         if (error)
878                 return error;
879
880         if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
881                 error = EINVAL;
882                 goto out;
883         }
884         if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
885                 error = EINVAL;
886                 goto out;
887         }
888         if (args->gpa + args->size <= args->gpa) {
889                 error = EINVAL;
890                 goto out;
891         }
892         if (args->gpa + args->size >= mach->gpa_end) {
893                 error = EINVAL;
894                 goto out;
895         }
896         gpa = args->gpa;
897
898         /* Unmap the memory from the machine. */
899         uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size);
900
901 out:
902         nvmm_machine_put(mach);
903         return error;
904 }
905
906 /* -------------------------------------------------------------------------- */
907
908 static int
909 nvmm_ctl_mach_info(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
910 {
911         struct nvmm_ctl_mach_info ctl;
912         struct nvmm_machine *mach;
913         int error;
914         size_t i;
915
916         if (args->size != sizeof(ctl))
917                 return EINVAL;
918         error = copyin(args->data, &ctl, sizeof(ctl));
919         if (error)
920                 return error;
921
922         error = nvmm_machine_get(owner, ctl.machid, &mach, true);
923         if (error)
924                 return error;
925
926         ctl.nvcpus = mach->ncpus;
927
928         ctl.nram = 0;
929         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
930                 if (!mach->hmap[i].present)
931                         continue;
932                 ctl.nram += mach->hmap[i].size;
933         }
934
935         ctl.pid = mach->owner->pid;
936         ctl.time = mach->time;
937
938         nvmm_machine_put(mach);
939
940         error = copyout(&ctl, args->data, sizeof(ctl));
941         if (error)
942                 return error;
943
944         return 0;
945 }
946
947 static int
948 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
949 {
950         switch (args->op) {
951         case NVMM_CTL_MACH_INFO:
952                 return nvmm_ctl_mach_info(owner, args);
953         default:
954                 return EINVAL;
955         }
956 }
957
958 /* -------------------------------------------------------------------------- */
959
960 static const struct nvmm_impl *
961 nvmm_ident(void)
962 {
963         size_t i;
964
965         for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
966                 if ((*nvmm_impl_list[i]->ident)())
967                         return nvmm_impl_list[i];
968         }
969
970         return NULL;
971 }
972
973 static int
974 nvmm_init(void)
975 {
976         size_t i, n;
977
978         nvmm_impl = nvmm_ident();
979         if (nvmm_impl == NULL)
980                 return ENOTSUP;
981
982         for (i = 0; i < NVMM_MAX_MACHINES; i++) {
983                 machines[i].machid = i;
984                 rw_init(&machines[i].lock);
985                 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
986                         machines[i].cpus[n].present = false;
987                         machines[i].cpus[n].cpuid = n;
988                         mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT,
989                             IPL_NONE);
990                 }
991         }
992
993         (*nvmm_impl->init)();
994
995         return 0;
996 }
997
998 static void
999 nvmm_fini(void)
1000 {
1001         size_t i, n;
1002
1003         for (i = 0; i < NVMM_MAX_MACHINES; i++) {
1004                 rw_destroy(&machines[i].lock);
1005                 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
1006                         mutex_destroy(&machines[i].cpus[n].lock);
1007                 }
1008         }
1009
1010         (*nvmm_impl->fini)();
1011         nvmm_impl = NULL;
1012 }
1013
1014 /* -------------------------------------------------------------------------- */
1015
1016 static dev_type_open(nvmm_open);
1017
1018 const struct cdevsw nvmm_cdevsw = {
1019         .d_open = nvmm_open,
1020         .d_close = noclose,
1021         .d_read = noread,
1022         .d_write = nowrite,
1023         .d_ioctl = noioctl,
1024         .d_stop = nostop,
1025         .d_tty = notty,
1026         .d_poll = nopoll,
1027         .d_mmap = nommap,
1028         .d_kqfilter = nokqfilter,
1029         .d_discard = nodiscard,
1030         .d_flag = D_OTHER | D_MPSAFE
1031 };
1032
1033 static int nvmm_ioctl(file_t *, u_long, void *);
1034 static int nvmm_close(file_t *);
1035 static int nvmm_mmap(file_t *, off_t *, size_t, int, int *, int *,
1036     struct uvm_object **, int *);
1037
1038 static const struct fileops nvmm_fileops = {
1039         .fo_read = fbadop_read,
1040         .fo_write = fbadop_write,
1041         .fo_ioctl = nvmm_ioctl,
1042         .fo_fcntl = fnullop_fcntl,
1043         .fo_poll = fnullop_poll,
1044         .fo_stat = fbadop_stat,
1045         .fo_close = nvmm_close,
1046         .fo_kqfilter = fnullop_kqfilter,
1047         .fo_restart = fnullop_restart,
1048         .fo_mmap = nvmm_mmap,
1049 };
1050
1051 static int
1052 nvmm_open(dev_t dev, int flags, int type, struct lwp *l)
1053 {
1054         struct nvmm_owner *owner;
1055         struct file *fp;
1056         int error, fd;
1057
1058         if (__predict_false(nvmm_impl == NULL))
1059                 return ENXIO;
1060         if (minor(dev) != 0)
1061                 return EXDEV;
1062         if (!(flags & O_CLOEXEC))
1063                 return EINVAL;
1064         error = fd_allocfile(&fp, &fd);
1065         if (error)
1066                 return error;
1067
1068         if (OFLAGS(flags) & O_WRONLY) {
1069                 owner = &root_owner;
1070         } else {
1071                 owner = kmem_alloc(sizeof(*owner), KM_SLEEP);
1072                 owner->pid = l->l_proc->p_pid;
1073         }
1074
1075         return fd_clone(fp, fd, flags, &nvmm_fileops, owner);
1076 }
1077
1078 static int
1079 nvmm_close(file_t *fp)
1080 {
1081         struct nvmm_owner *owner = fp->f_data;
1082
1083         KASSERT(owner != NULL);
1084         nvmm_kill_machines(owner);
1085         if (owner != &root_owner) {
1086                 kmem_free(owner, sizeof(*owner));
1087         }
1088         fp->f_data = NULL;
1089
1090         return 0;
1091 }
1092
1093 static int
1094 nvmm_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp,
1095     int *advicep, struct uvm_object **uobjp, int *maxprotp)
1096 {
1097         struct nvmm_owner *owner = fp->f_data;
1098         struct nvmm_machine *mach;
1099         nvmm_machid_t machid;
1100         nvmm_cpuid_t cpuid;
1101         int error;
1102
1103         if (prot & PROT_EXEC)
1104                 return EACCES;
1105         if (size != PAGE_SIZE)
1106                 return EINVAL;
1107
1108         cpuid = NVMM_COMM_CPUID(*offp);
1109         if (__predict_false(cpuid >= NVMM_MAX_VCPUS))
1110                 return EINVAL;
1111
1112         machid = NVMM_COMM_MACHID(*offp);
1113         error = nvmm_machine_get(owner, machid, &mach, false);
1114         if (error)
1115                 return error;
1116
1117         uao_reference(mach->commuobj);
1118         *uobjp = mach->commuobj;
1119         *offp = cpuid * PAGE_SIZE;
1120         *maxprotp = prot;
1121         *advicep = UVM_ADV_RANDOM;
1122
1123         nvmm_machine_put(mach);
1124         return 0;
1125 }
1126
1127 static int
1128 nvmm_ioctl(file_t *fp, u_long cmd, void *data)
1129 {
1130         struct nvmm_owner *owner = fp->f_data;
1131
1132         KASSERT(owner != NULL);
1133
1134         switch (cmd) {
1135         case NVMM_IOC_CAPABILITY:
1136                 return nvmm_capability(owner, data);
1137         case NVMM_IOC_MACHINE_CREATE:
1138                 return nvmm_machine_create(owner, data);
1139         case NVMM_IOC_MACHINE_DESTROY:
1140                 return nvmm_machine_destroy(owner, data);
1141         case NVMM_IOC_MACHINE_CONFIGURE:
1142                 return nvmm_machine_configure(owner, data);
1143         case NVMM_IOC_VCPU_CREATE:
1144                 return nvmm_vcpu_create(owner, data);
1145         case NVMM_IOC_VCPU_DESTROY:
1146                 return nvmm_vcpu_destroy(owner, data);
1147         case NVMM_IOC_VCPU_CONFIGURE:
1148                 return nvmm_vcpu_configure(owner, data);
1149         case NVMM_IOC_VCPU_SETSTATE:
1150                 return nvmm_vcpu_setstate(owner, data);
1151         case NVMM_IOC_VCPU_GETSTATE:
1152                 return nvmm_vcpu_getstate(owner, data);
1153         case NVMM_IOC_VCPU_INJECT:
1154                 return nvmm_vcpu_inject(owner, data);
1155         case NVMM_IOC_VCPU_RUN:
1156                 return nvmm_vcpu_run(owner, data);
1157         case NVMM_IOC_GPA_MAP:
1158                 return nvmm_gpa_map(owner, data);
1159         case NVMM_IOC_GPA_UNMAP:
1160                 return nvmm_gpa_unmap(owner, data);
1161         case NVMM_IOC_HVA_MAP:
1162                 return nvmm_hva_map(owner, data);
1163         case NVMM_IOC_HVA_UNMAP:
1164                 return nvmm_hva_unmap(owner, data);
1165         case NVMM_IOC_CTL:
1166                 return nvmm_ctl(owner, data);
1167         default:
1168                 return EINVAL;
1169         }
1170 }
1171
1172 /* -------------------------------------------------------------------------- */
1173
1174 static int nvmm_match(device_t, cfdata_t, void *);
1175 static void nvmm_attach(device_t, device_t, void *);
1176 static int nvmm_detach(device_t, int);
1177
1178 extern struct cfdriver nvmm_cd;
1179
1180 CFATTACH_DECL_NEW(nvmm, 0, nvmm_match, nvmm_attach, nvmm_detach, NULL);
1181
1182 static struct cfdata nvmm_cfdata[] = {
1183         {
1184                 .cf_name = "nvmm",
1185                 .cf_atname = "nvmm",
1186                 .cf_unit = 0,
1187                 .cf_fstate = FSTATE_STAR,
1188                 .cf_loc = NULL,
1189                 .cf_flags = 0,
1190                 .cf_pspec = NULL,
1191         },
1192         { NULL, NULL, 0, FSTATE_NOTFOUND, NULL, 0, NULL }
1193 };
1194
1195 static int
1196 nvmm_match(device_t self, cfdata_t cfdata, void *arg)
1197 {
1198         return 1;
1199 }
1200
1201 static void
1202 nvmm_attach(device_t parent, device_t self, void *aux)
1203 {
1204         int error;
1205
1206         error = nvmm_init();
1207         if (error)
1208                 panic("%s: impossible", __func__);
1209         aprint_normal_dev(self, "attached, using backend %s\n",
1210             nvmm_impl->name);
1211 }
1212
1213 static int
1214 nvmm_detach(device_t self, int flags)
1215 {
1216         if (atomic_load_acq_int(&nmachines) > 0)
1217                 return EBUSY;
1218         nvmm_fini();
1219         return 0;
1220 }
1221
1222 void
1223 nvmmattach(int nunits)
1224 {
1225         /* nothing */
1226 }
1227
1228 MODULE(MODULE_CLASS_MISC, nvmm, NULL);
1229
1230 #if defined(_MODULE)
1231 CFDRIVER_DECL(nvmm, DV_VIRTUAL, NULL);
1232 #endif
1233
1234 static int
1235 nvmm_modcmd(modcmd_t cmd, void *arg)
1236 {
1237 #if defined(_MODULE)
1238         devmajor_t bmajor = NODEVMAJOR;
1239         devmajor_t cmajor = 345;
1240 #endif
1241         int error;
1242
1243         switch (cmd) {
1244         case MODULE_CMD_INIT:
1245                 if (nvmm_ident() == NULL) {
1246                         aprint_error("%s: cpu not supported\n",
1247                             nvmm_cd.cd_name);
1248                         return ENOTSUP;
1249                 }
1250 #if defined(_MODULE)
1251                 error = config_cfdriver_attach(&nvmm_cd);
1252                 if (error)
1253                         return error;
1254 #endif
1255                 error = config_cfattach_attach(nvmm_cd.cd_name, &nvmm_ca);
1256                 if (error) {
1257                         config_cfdriver_detach(&nvmm_cd);
1258                         aprint_error("%s: config_cfattach_attach failed\n",
1259                             nvmm_cd.cd_name);
1260                         return error;
1261                 }
1262
1263                 error = config_cfdata_attach(nvmm_cfdata, 1);
1264                 if (error) {
1265                         config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1266                         config_cfdriver_detach(&nvmm_cd);
1267                         aprint_error("%s: unable to register cfdata\n",
1268                             nvmm_cd.cd_name);
1269                         return error;
1270                 }
1271
1272                 if (config_attach_pseudo(nvmm_cfdata) == NULL) {
1273                         aprint_error("%s: config_attach_pseudo failed\n",
1274                             nvmm_cd.cd_name);
1275                         config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1276                         config_cfdriver_detach(&nvmm_cd);
1277                         return ENXIO;
1278                 }
1279
1280 #if defined(_MODULE)
1281                 /* mknod /dev/nvmm c 345 0 */
1282                 error = devsw_attach(nvmm_cd.cd_name, NULL, &bmajor,
1283                         &nvmm_cdevsw, &cmajor);
1284                 if (error) {
1285                         aprint_error("%s: unable to register devsw\n",
1286                             nvmm_cd.cd_name);
1287                         config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1288                         config_cfdriver_detach(&nvmm_cd);
1289                         return error;
1290                 }
1291 #endif
1292                 return 0;
1293         case MODULE_CMD_FINI:
1294                 error = config_cfdata_detach(nvmm_cfdata);
1295                 if (error)
1296                         return error;
1297                 error = config_cfattach_detach(nvmm_cd.cd_name, &nvmm_ca);
1298                 if (error)
1299                         return error;
1300 #if defined(_MODULE)
1301                 config_cfdriver_detach(&nvmm_cd);
1302                 devsw_detach(NULL, &nvmm_cdevsw);
1303 #endif
1304                 return 0;
1305         case MODULE_CMD_AUTOUNLOAD:
1306                 return EBUSY;
1307         default:
1308                 return ENOTTY;
1309         }
1310 }