sys/dev/virtual/nvmm/nvmm.c

   1 /*      $NetBSD: nvmm.c,v 1.43 2021/04/12 09:22:58 mrg Exp $    */
   2
   3 /*
   4  * Copyright (c) 2018-2020 Maxime Villard, m00nbsd.net
   5  * All rights reserved.
   6  *
   7  * This code is part of the NVMM hypervisor.
   8  *
   9  * Redistribution and use in source and binary forms, with or without
  10  * modification, are permitted provided that the following conditions
  11  * are met:
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in the
  16  *    documentation and/or other materials provided with the distribution.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  23  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  24  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  25  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  26  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  28  * SUCH DAMAGE.
  29  */
  30
  31 #include <sys/param.h>
  32 #include <sys/systm.h>
  33
  34 #include <sys/conf.h>
  35 #include <sys/devfs.h>
  36 #include <sys/device.h>
  37 #include <sys/fcntl.h>
  38 #include <sys/kernel.h>
  39 #include <sys/module.h>
  40 #include <sys/priv.h>
  41 #include <sys/thread.h>
  42
  43 #include <dev/virtual/nvmm/nvmm_compat.h>
  44 #include <dev/virtual/nvmm/nvmm.h>
  45 #include <dev/virtual/nvmm/nvmm_internal.h>
  46 #include <dev/virtual/nvmm/nvmm_ioctl.h>
  47
  48 MALLOC_DEFINE(M_NVMM, "nvmm", "NVMM data");
  49
  50 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
  51 static volatile unsigned int nmachines __cacheline_aligned;
  52
  53 static const struct nvmm_impl *nvmm_impl_list[] = {
  54 #if defined(__x86_64__)
  55         &nvmm_x86_svm,  /* x86 AMD SVM */
  56         &nvmm_x86_vmx   /* x86 Intel VMX */
  57 #endif
  58 };
  59
  60 static const struct nvmm_impl *nvmm_impl __read_mostly = NULL;
  61
  62 static struct nvmm_owner root_owner;
  63
  64 /* -------------------------------------------------------------------------- */
  65
  66 static int
  67 nvmm_machine_alloc(struct nvmm_machine **ret)
  68 {
  69         struct nvmm_machine *mach;
  70         size_t i;
  71
  72         for (i = 0; i < NVMM_MAX_MACHINES; i++) {
  73                 mach = &machines[i];
  74
  75                 rw_enter(&mach->lock, RW_WRITER);
  76                 if (mach->present) {
  77                         rw_exit(&mach->lock);
  78                         continue;
  79                 }
  80
  81                 mach->present = true;
  82                 mach->time = time_second;
  83                 *ret = mach;
  84                 atomic_inc_uint(&nmachines);
  85                 return 0;
  86         }
  87
  88         return ENOBUFS;
  89 }
  90
  91 static void
  92 nvmm_machine_free(struct nvmm_machine *mach)
  93 {
  94         KASSERT(rw_write_held(&mach->lock));
  95         KASSERT(mach->present);
  96         mach->present = false;
  97         atomic_dec_uint(&nmachines);
  98 }
  99
 100 static int
 101 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid,
 102     struct nvmm_machine **ret, bool writer)
 103 {
 104         struct nvmm_machine *mach;
 105         krw_t op = writer ? RW_WRITER : RW_READER;
 106
 107         if (__predict_false(machid >= NVMM_MAX_MACHINES)) {
 108                 return EINVAL;
 109         }
 110         mach = &machines[machid];
 111
 112         rw_enter(&mach->lock, op);
 113         if (__predict_false(!mach->present)) {
 114                 rw_exit(&mach->lock);
 115                 return ENOENT;
 116         }
 117         if (__predict_false(mach->owner != owner && owner != &root_owner)) {
 118                 rw_exit(&mach->lock);
 119                 return EPERM;
 120         }
 121         *ret = mach;
 122
 123         return 0;
 124 }
 125
 126 static void
 127 nvmm_machine_put(struct nvmm_machine *mach)
 128 {
 129         rw_exit(&mach->lock);
 130 }
 131
 132 /* -------------------------------------------------------------------------- */
 133
 134 static int
 135 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
 136     struct nvmm_cpu **ret)
 137 {
 138         struct nvmm_cpu *vcpu;
 139
 140         if (cpuid >= NVMM_MAX_VCPUS) {
 141                 return EINVAL;
 142         }
 143         vcpu = &mach->cpus[cpuid];
 144
 145         mutex_enter(&vcpu->lock);
 146         if (vcpu->present) {
 147                 mutex_exit(&vcpu->lock);
 148                 return EBUSY;
 149         }
 150
 151         vcpu->present = true;
 152         vcpu->comm = NULL;
 153         vcpu->hcpu_last = -1;
 154         *ret = vcpu;
 155         return 0;
 156 }
 157
 158 static void
 159 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
 160 {
 161         KASSERT(mutex_owned(&vcpu->lock));
 162         vcpu->present = false;
 163         if (vcpu->comm != NULL) {
 164                 uvm_deallocate(kernel_map, (vaddr_t)vcpu->comm, PAGE_SIZE);
 165         }
 166 }
 167
 168 static int
 169 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
 170     struct nvmm_cpu **ret)
 171 {
 172         struct nvmm_cpu *vcpu;
 173
 174         if (__predict_false(cpuid >= NVMM_MAX_VCPUS)) {
 175                 return EINVAL;
 176         }
 177         vcpu = &mach->cpus[cpuid];
 178
 179         mutex_enter(&vcpu->lock);
 180         if (__predict_false(!vcpu->present)) {
 181                 mutex_exit(&vcpu->lock);
 182                 return ENOENT;
 183         }
 184         *ret = vcpu;
 185
 186         return 0;
 187 }
 188
 189 static void
 190 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
 191 {
 192         mutex_exit(&vcpu->lock);
 193 }
 194
 195 /* -------------------------------------------------------------------------- */
 196
 197 static void
 198 nvmm_kill_machines(struct nvmm_owner *owner)
 199 {
 200         struct nvmm_machine *mach;
 201         struct nvmm_cpu *vcpu;
 202         size_t i, j;
 203         int error;
 204
 205         for (i = 0; i < NVMM_MAX_MACHINES; i++) {
 206                 mach = &machines[i];
 207
 208                 rw_enter(&mach->lock, RW_WRITER);
 209                 if (!mach->present || mach->owner != owner) {
 210                         rw_exit(&mach->lock);
 211                         continue;
 212                 }
 213
 214                 /* Kill it. */
 215                 for (j = 0; j < NVMM_MAX_VCPUS; j++) {
 216                         error = nvmm_vcpu_get(mach, j, &vcpu);
 217                         if (error)
 218                                 continue;
 219                         (*nvmm_impl->vcpu_destroy)(mach, vcpu);
 220                         nvmm_vcpu_free(mach, vcpu);
 221                         nvmm_vcpu_put(vcpu);
 222                         atomic_dec_uint(&mach->ncpus);
 223                 }
 224                 (*nvmm_impl->machine_destroy)(mach);
 225                 uvmspace_free(mach->vm);
 226
 227                 /* Drop the kernel UOBJ refs. */
 228                 for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) {
 229                         if (!mach->hmap[j].present)
 230                                 continue;
 231                         uao_detach(mach->hmap[j].uobj);
 232                 }
 233
 234                 nvmm_machine_free(mach);
 235
 236                 rw_exit(&mach->lock);
 237         }
 238 }
 239
 240 /* -------------------------------------------------------------------------- */
 241
 242 static int
 243 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args)
 244 {
 245         args->cap.version = NVMM_KERN_VERSION;
 246         args->cap.state_size = nvmm_impl->state_size;
 247         args->cap.max_machines = NVMM_MAX_MACHINES;
 248         args->cap.max_vcpus = NVMM_MAX_VCPUS;
 249         args->cap.max_ram = NVMM_MAX_RAM;
 250
 251         (*nvmm_impl->capability)(&args->cap);
 252
 253         return 0;
 254 }
 255
 256 static int
 257 nvmm_machine_create(struct nvmm_owner *owner,
 258     struct nvmm_ioc_machine_create *args)
 259 {
 260         struct nvmm_machine *mach;
 261         int error;
 262
 263         error = nvmm_machine_alloc(&mach);
 264         if (error)
 265                 return error;
 266
 267         /* Curproc owns the machine. */
 268         mach->owner = owner;
 269
 270         /* Zero out the host mappings. */
 271         memset(&mach->hmap, 0, sizeof(mach->hmap));
 272
 273         /* Create the machine vmspace. */
 274         mach->gpa_begin = 0;
 275         mach->gpa_end = NVMM_MAX_RAM;
 276         mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false);
 277
 278         /* Create the comm uobj. */
 279         mach->commuobj = uao_create(NVMM_MAX_VCPUS * PAGE_SIZE, 0);
 280
 281         (*nvmm_impl->machine_create)(mach);
 282
 283         args->machid = mach->machid;
 284         nvmm_machine_put(mach);
 285
 286         return 0;
 287 }
 288
 289 static int
 290 nvmm_machine_destroy(struct nvmm_owner *owner,
 291     struct nvmm_ioc_machine_destroy *args)
 292 {
 293         struct nvmm_machine *mach;
 294         struct nvmm_cpu *vcpu;
 295         int error;
 296         size_t i;
 297
 298         error = nvmm_machine_get(owner, args->machid, &mach, true);
 299         if (error)
 300                 return error;
 301
 302         for (i = 0; i < NVMM_MAX_VCPUS; i++) {
 303                 error = nvmm_vcpu_get(mach, i, &vcpu);
 304                 if (error)
 305                         continue;
 306
 307                 (*nvmm_impl->vcpu_destroy)(mach, vcpu);
 308                 nvmm_vcpu_free(mach, vcpu);
 309                 nvmm_vcpu_put(vcpu);
 310                 atomic_dec_uint(&mach->ncpus);
 311         }
 312
 313         (*nvmm_impl->machine_destroy)(mach);
 314
 315         /* Free the machine vmspace. */
 316         uvmspace_free(mach->vm);
 317
 318         /* Drop the kernel UOBJ refs. */
 319         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
 320                 if (!mach->hmap[i].present)
 321                         continue;
 322                 uao_detach(mach->hmap[i].uobj);
 323         }
 324
 325         nvmm_machine_free(mach);
 326         nvmm_machine_put(mach);
 327
 328         return 0;
 329 }
 330
 331 static int
 332 nvmm_machine_configure(struct nvmm_owner *owner,
 333     struct nvmm_ioc_machine_configure *args)
 334 {
 335         struct nvmm_machine *mach;
 336         size_t allocsz;
 337         uint64_t op;
 338         void *data;
 339         int error;
 340
 341         op = NVMM_MACH_CONF_MD(args->op);
 342         if (__predict_false(op >= nvmm_impl->mach_conf_max)) {
 343                 return EINVAL;
 344         }
 345
 346         allocsz = nvmm_impl->mach_conf_sizes[op];
 347         data = kmem_alloc(allocsz, KM_SLEEP);
 348
 349         error = nvmm_machine_get(owner, args->machid, &mach, true);
 350         if (error) {
 351                 kmem_free(data, allocsz);
 352                 return error;
 353         }
 354
 355         error = copyin(args->conf, data, allocsz);
 356         if (error) {
 357                 goto out;
 358         }
 359
 360         error = (*nvmm_impl->machine_configure)(mach, op, data);
 361
 362 out:
 363         nvmm_machine_put(mach);
 364         kmem_free(data, allocsz);
 365         return error;
 366 }
 367
 368 static int
 369 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args)
 370 {
 371         struct nvmm_machine *mach;
 372         struct nvmm_cpu *vcpu;
 373         int error;
 374
 375         error = nvmm_machine_get(owner, args->machid, &mach, false);
 376         if (error)
 377                 return error;
 378
 379         error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu);
 380         if (error)
 381                 goto out;
 382
 383         /* Allocate the comm page. */
 384         uao_reference(mach->commuobj);
 385         error = uvm_map(kernel_map, (vaddr_t *)&vcpu->comm, PAGE_SIZE,
 386             mach->commuobj, args->cpuid * PAGE_SIZE, 0, UVM_MAPFLAG(UVM_PROT_RW,
 387             UVM_PROT_RW, UVM_INH_SHARE, UVM_ADV_RANDOM, 0));
 388         if (error) {
 389                 uao_detach(mach->commuobj);
 390                 nvmm_vcpu_free(mach, vcpu);
 391                 nvmm_vcpu_put(vcpu);
 392                 goto out;
 393         }
 394         error = uvm_map_pageable(kernel_map, (vaddr_t)vcpu->comm,
 395             (vaddr_t)vcpu->comm + PAGE_SIZE, false, 0);
 396         if (error) {
 397                 nvmm_vcpu_free(mach, vcpu);
 398                 nvmm_vcpu_put(vcpu);
 399                 goto out;
 400         }
 401         memset(vcpu->comm, 0, PAGE_SIZE);
 402
 403         error = (*nvmm_impl->vcpu_create)(mach, vcpu);
 404         if (error) {
 405                 nvmm_vcpu_free(mach, vcpu);
 406                 nvmm_vcpu_put(vcpu);
 407                 goto out;
 408         }
 409
 410         nvmm_vcpu_put(vcpu);
 411         atomic_inc_uint(&mach->ncpus);
 412
 413 out:
 414         nvmm_machine_put(mach);
 415         return error;
 416 }
 417
 418 static int
 419 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args)
 420 {
 421         struct nvmm_machine *mach;
 422         struct nvmm_cpu *vcpu;
 423         int error;
 424
 425         error = nvmm_machine_get(owner, args->machid, &mach, false);
 426         if (error)
 427                 return error;
 428
 429         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
 430         if (error)
 431                 goto out;
 432
 433         (*nvmm_impl->vcpu_destroy)(mach, vcpu);
 434         nvmm_vcpu_free(mach, vcpu);
 435         nvmm_vcpu_put(vcpu);
 436         atomic_dec_uint(&mach->ncpus);
 437
 438 out:
 439         nvmm_machine_put(mach);
 440         return error;
 441 }
 442
 443 static int
 444 nvmm_vcpu_configure(struct nvmm_owner *owner,
 445     struct nvmm_ioc_vcpu_configure *args)
 446 {
 447         struct nvmm_machine *mach;
 448         struct nvmm_cpu *vcpu;
 449         size_t allocsz;
 450         uint64_t op;
 451         void *data;
 452         int error;
 453
 454         op = NVMM_VCPU_CONF_MD(args->op);
 455         if (__predict_false(op >= nvmm_impl->vcpu_conf_max))
 456                 return EINVAL;
 457
 458         allocsz = nvmm_impl->vcpu_conf_sizes[op];
 459         data = kmem_alloc(allocsz, KM_SLEEP);
 460
 461         error = nvmm_machine_get(owner, args->machid, &mach, false);
 462         if (error) {
 463                 kmem_free(data, allocsz);
 464                 return error;
 465         }
 466
 467         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
 468         if (error) {
 469                 nvmm_machine_put(mach);
 470                 kmem_free(data, allocsz);
 471                 return error;
 472         }
 473
 474         error = copyin(args->conf, data, allocsz);
 475         if (error) {
 476                 goto out;
 477         }
 478
 479         error = (*nvmm_impl->vcpu_configure)(vcpu, op, data);
 480
 481 out:
 482         nvmm_vcpu_put(vcpu);
 483         nvmm_machine_put(mach);
 484         kmem_free(data, allocsz);
 485         return error;
 486 }
 487
 488 static int
 489 nvmm_vcpu_setstate(struct nvmm_owner *owner,
 490     struct nvmm_ioc_vcpu_setstate *args)
 491 {
 492         struct nvmm_machine *mach;
 493         struct nvmm_cpu *vcpu;
 494         int error;
 495
 496         error = nvmm_machine_get(owner, args->machid, &mach, false);
 497         if (error)
 498                 return error;
 499
 500         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
 501         if (error)
 502                 goto out;
 503
 504         (*nvmm_impl->vcpu_setstate)(vcpu);
 505         nvmm_vcpu_put(vcpu);
 506
 507 out:
 508         nvmm_machine_put(mach);
 509         return error;
 510 }
 511
 512 static int
 513 nvmm_vcpu_getstate(struct nvmm_owner *owner,
 514     struct nvmm_ioc_vcpu_getstate *args)
 515 {
 516         struct nvmm_machine *mach;
 517         struct nvmm_cpu *vcpu;
 518         int error;
 519
 520         error = nvmm_machine_get(owner, args->machid, &mach, false);
 521         if (error)
 522                 return error;
 523
 524         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
 525         if (error)
 526                 goto out;
 527
 528         (*nvmm_impl->vcpu_getstate)(vcpu);
 529         nvmm_vcpu_put(vcpu);
 530
 531 out:
 532         nvmm_machine_put(mach);
 533         return error;
 534 }
 535
 536 static int
 537 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args)
 538 {
 539         struct nvmm_machine *mach;
 540         struct nvmm_cpu *vcpu;
 541         int error;
 542
 543         error = nvmm_machine_get(owner, args->machid, &mach, false);
 544         if (error)
 545                 return error;
 546
 547         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
 548         if (error)
 549                 goto out;
 550
 551         error = (*nvmm_impl->vcpu_inject)(vcpu);
 552         nvmm_vcpu_put(vcpu);
 553
 554 out:
 555         nvmm_machine_put(mach);
 556         return error;
 557 }
 558
 559 static int
 560 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
 561     struct nvmm_vcpu_exit *exit)
 562 {
 563         struct vmspace *vm = mach->vm;
 564         int ret;
 565
 566         while (1) {
 567                 /* Got a signal? Or pending resched? Leave. */
 568                 if (__predict_false(nvmm_return_needed())) {
 569                         exit->reason = NVMM_VCPU_EXIT_NONE;
 570                         return 0;
 571                 }
 572
 573                 /* Run the VCPU. */
 574                 ret = (*nvmm_impl->vcpu_run)(mach, vcpu, exit);
 575                 if (__predict_false(ret != 0)) {
 576                         return ret;
 577                 }
 578
 579                 /* Process nested page faults. */
 580                 if (__predict_true(exit->reason != NVMM_VCPU_EXIT_MEMORY)) {
 581                         break;
 582                 }
 583                 if (exit->u.mem.gpa >= mach->gpa_end) {
 584                         break;
 585                 }
 586                 if (uvm_fault(&vm->vm_map, exit->u.mem.gpa, exit->u.mem.prot)) {
 587                         break;
 588                 }
 589         }
 590
 591         return 0;
 592 }
 593
 594 static int
 595 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args)
 596 {
 597         struct nvmm_machine *mach;
 598         struct nvmm_cpu *vcpu;
 599         int error;
 600
 601         error = nvmm_machine_get(owner, args->machid, &mach, false);
 602         if (error)
 603                 return error;
 604
 605         error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
 606         if (error)
 607                 goto out;
 608
 609         error = nvmm_do_vcpu_run(mach, vcpu, &args->exit);
 610         nvmm_vcpu_put(vcpu);
 611
 612 out:
 613         nvmm_machine_put(mach);
 614         return error;
 615 }
 616
 617 /* -------------------------------------------------------------------------- */
 618
 619 static struct uvm_object *
 620 nvmm_hmapping_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size,
 621    size_t *off)
 622 {
 623         struct nvmm_hmapping *hmapping;
 624         size_t i;
 625
 626         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
 627                 hmapping = &mach->hmap[i];
 628                 if (!hmapping->present) {
 629                         continue;
 630                 }
 631                 if (hva >= hmapping->hva &&
 632                     hva + size <= hmapping->hva + hmapping->size) {
 633                         *off = hva - hmapping->hva;
 634                         return hmapping->uobj;
 635                 }
 636         }
 637
 638         return NULL;
 639 }
 640
 641 static int
 642 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size)
 643 {
 644         struct nvmm_hmapping *hmapping;
 645         size_t i;
 646
 647         if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) {
 648                 return EINVAL;
 649         }
 650         if (hva == 0) {
 651                 return EINVAL;
 652         }
 653
 654         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
 655                 hmapping = &mach->hmap[i];
 656                 if (!hmapping->present) {
 657                         continue;
 658                 }
 659
 660                 if (hva >= hmapping->hva &&
 661                     hva + size <= hmapping->hva + hmapping->size) {
 662                         break;
 663                 }
 664
 665                 if (hva >= hmapping->hva &&
 666                     hva < hmapping->hva + hmapping->size) {
 667                         return EEXIST;
 668                 }
 669                 if (hva + size > hmapping->hva &&
 670                     hva + size <= hmapping->hva + hmapping->size) {
 671                         return EEXIST;
 672                 }
 673                 if (hva <= hmapping->hva &&
 674                     hva + size >= hmapping->hva + hmapping->size) {
 675                         return EEXIST;
 676                 }
 677         }
 678
 679         return 0;
 680 }
 681
 682 static struct nvmm_hmapping *
 683 nvmm_hmapping_alloc(struct nvmm_machine *mach)
 684 {
 685         struct nvmm_hmapping *hmapping;
 686         size_t i;
 687
 688         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
 689                 hmapping = &mach->hmap[i];
 690                 if (!hmapping->present) {
 691                         hmapping->present = true;
 692                         return hmapping;
 693                 }
 694         }
 695
 696         return NULL;
 697 }
 698
 699 static int
 700 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size)
 701 {
 702         struct vmspace *vmspace = curproc->p_vmspace;
 703         struct nvmm_hmapping *hmapping;
 704         size_t i;
 705
 706         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
 707                 hmapping = &mach->hmap[i];
 708                 if (!hmapping->present || hmapping->hva != hva ||
 709                     hmapping->size != size) {
 710                         continue;
 711                 }
 712
 713                 uvm_unmap(&vmspace->vm_map, hmapping->hva,
 714                     hmapping->hva + hmapping->size);
 715                 uao_detach(hmapping->uobj);
 716
 717                 hmapping->uobj = NULL;
 718                 hmapping->present = false;
 719
 720                 return 0;
 721         }
 722
 723         return ENOENT;
 724 }
 725
 726 static int
 727 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args)
 728 {
 729         struct vmspace *vmspace = curproc->p_vmspace;
 730         struct nvmm_machine *mach;
 731         struct nvmm_hmapping *hmapping;
 732         vaddr_t uva;
 733         int error;
 734
 735         error = nvmm_machine_get(owner, args->machid, &mach, true);
 736         if (error)
 737                 return error;
 738
 739         error = nvmm_hmapping_validate(mach, args->hva, args->size);
 740         if (error)
 741                 goto out;
 742
 743         hmapping = nvmm_hmapping_alloc(mach);
 744         if (hmapping == NULL) {
 745                 error = ENOBUFS;
 746                 goto out;
 747         }
 748
 749         hmapping->hva = args->hva;
 750         hmapping->size = args->size;
 751         hmapping->uobj = uao_create(hmapping->size, 0);
 752         uva = hmapping->hva;
 753
 754         /* Take a reference for the user. */
 755         uao_reference(hmapping->uobj);
 756
 757         /* Map the uobj into the user address space, as pageable. */
 758         error = uvm_map(&vmspace->vm_map, &uva, hmapping->size, hmapping->uobj,
 759             0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE,
 760             UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
 761         if (error) {
 762                 uao_detach(hmapping->uobj);
 763         }
 764
 765 out:
 766         nvmm_machine_put(mach);
 767         return error;
 768 }
 769
 770 static int
 771 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args)
 772 {
 773         struct nvmm_machine *mach;
 774         int error;
 775
 776         error = nvmm_machine_get(owner, args->machid, &mach, true);
 777         if (error)
 778                 return error;
 779
 780         error = nvmm_hmapping_free(mach, args->hva, args->size);
 781
 782         nvmm_machine_put(mach);
 783         return error;
 784 }
 785
 786 /* -------------------------------------------------------------------------- */
 787
 788 static int
 789 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args)
 790 {
 791         struct nvmm_machine *mach;
 792         struct uvm_object *uobj;
 793         gpaddr_t gpa;
 794         size_t off;
 795         int error;
 796
 797         error = nvmm_machine_get(owner, args->machid, &mach, false);
 798         if (error)
 799                 return error;
 800
 801         if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) {
 802                 error = EINVAL;
 803                 goto out;
 804         }
 805
 806         if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
 807             (args->hva % PAGE_SIZE) != 0) {
 808                 error = EINVAL;
 809                 goto out;
 810         }
 811         if (args->hva == 0) {
 812                 error = EINVAL;
 813                 goto out;
 814         }
 815         if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
 816                 error = EINVAL;
 817                 goto out;
 818         }
 819         if (args->gpa + args->size <= args->gpa) {
 820                 error = EINVAL;
 821                 goto out;
 822         }
 823         if (args->gpa + args->size > mach->gpa_end) {
 824                 error = EINVAL;
 825                 goto out;
 826         }
 827         gpa = args->gpa;
 828
 829         uobj = nvmm_hmapping_getuobj(mach, args->hva, args->size, &off);
 830         if (uobj == NULL) {
 831                 error = EINVAL;
 832                 goto out;
 833         }
 834
 835         /* Take a reference for the machine. */
 836         uao_reference(uobj);
 837
 838         /* Map the uobj into the machine address space, as pageable. */
 839         error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0,
 840             UVM_MAPFLAG(args->prot, UVM_PROT_RWX, UVM_INH_NONE,
 841             UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP));
 842         if (error) {
 843                 uao_detach(uobj);
 844                 goto out;
 845         }
 846         if (gpa != args->gpa) {
 847                 uao_detach(uobj);
 848                 printf("[!] uvm_map problem\n");
 849                 error = EINVAL;
 850                 goto out;
 851         }
 852
 853 out:
 854         nvmm_machine_put(mach);
 855         return error;
 856 }
 857
 858 static int
 859 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args)
 860 {
 861         struct nvmm_machine *mach;
 862         gpaddr_t gpa;
 863         int error;
 864
 865         error = nvmm_machine_get(owner, args->machid, &mach, false);
 866         if (error)
 867                 return error;
 868
 869         if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
 870                 error = EINVAL;
 871                 goto out;
 872         }
 873         if (args->gpa < mach->gpa_begin || args->gpa >= mach->gpa_end) {
 874                 error = EINVAL;
 875                 goto out;
 876         }
 877         if (args->gpa + args->size <= args->gpa) {
 878                 error = EINVAL;
 879                 goto out;
 880         }
 881         if (args->gpa + args->size >= mach->gpa_end) {
 882                 error = EINVAL;
 883                 goto out;
 884         }
 885         gpa = args->gpa;
 886
 887         /* Unmap the memory from the machine. */
 888         uvm_unmap(&mach->vm->vm_map, gpa, gpa + args->size);
 889
 890 out:
 891         nvmm_machine_put(mach);
 892         return error;
 893 }
 894
 895 /* -------------------------------------------------------------------------- */
 896
 897 static int
 898 nvmm_ctl_mach_info(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
 899 {
 900         struct nvmm_ctl_mach_info ctl;
 901         struct nvmm_machine *mach;
 902         int error;
 903         size_t i;
 904
 905         if (args->size != sizeof(ctl))
 906                 return EINVAL;
 907         error = copyin(args->data, &ctl, sizeof(ctl));
 908         if (error)
 909                 return error;
 910
 911         error = nvmm_machine_get(owner, ctl.machid, &mach, true);
 912         if (error)
 913                 return error;
 914
 915         ctl.nvcpus = mach->ncpus;
 916
 917         ctl.nram = 0;
 918         for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
 919                 if (!mach->hmap[i].present)
 920                         continue;
 921                 ctl.nram += mach->hmap[i].size;
 922         }
 923
 924         ctl.pid = mach->owner->pid;
 925         ctl.time = mach->time;
 926
 927         nvmm_machine_put(mach);
 928
 929         error = copyout(&ctl, args->data, sizeof(ctl));
 930         if (error)
 931                 return error;
 932
 933         return 0;
 934 }
 935
 936 static int
 937 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
 938 {
 939         switch (args->op) {
 940         case NVMM_CTL_MACH_INFO:
 941                 return nvmm_ctl_mach_info(owner, args);
 942         default:
 943                 return EINVAL;
 944         }
 945 }
 946
 947 /* -------------------------------------------------------------------------- */
 948
 949 static const struct nvmm_impl *
 950 nvmm_ident(void)
 951 {
 952         size_t i;
 953
 954         for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
 955                 if ((*nvmm_impl_list[i]->ident)())
 956                         return nvmm_impl_list[i];
 957         }
 958
 959         return NULL;
 960 }
 961
 962 static int
 963 nvmm_init(void)
 964 {
 965         size_t i, n;
 966
 967         nvmm_impl = nvmm_ident();
 968         if (nvmm_impl == NULL)
 969                 return ENOTSUP;
 970
 971         for (i = 0; i < NVMM_MAX_MACHINES; i++) {
 972                 machines[i].machid = i;
 973                 rw_init(&machines[i].lock);
 974                 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
 975                         machines[i].cpus[n].present = false;
 976                         machines[i].cpus[n].cpuid = n;
 977                         mutex_init(&machines[i].cpus[n].lock, MUTEX_DEFAULT,
 978                             IPL_NONE);
 979                 }
 980         }
 981
 982         (*nvmm_impl->init)();
 983
 984         return 0;
 985 }
 986
 987 static void
 988 nvmm_fini(void)
 989 {
 990         size_t i, n;
 991
 992         for (i = 0; i < NVMM_MAX_MACHINES; i++) {
 993                 rw_destroy(&machines[i].lock);
 994                 for (n = 0; n < NVMM_MAX_VCPUS; n++) {
 995                         mutex_destroy(&machines[i].cpus[n].lock);
 996                 }
 997         }
 998
 999         (*nvmm_impl->fini)();
1000         nvmm_impl = NULL;
1001 }
1002
1003 /* -------------------------------------------------------------------------- */
1004
1005 static d_open_t nvmm_open;
1006 static d_ioctl_t nvmm_ioctl;
1007 static d_mmap_single_t nvmm_mmap_single;
1008 static d_priv_dtor_t nvmm_dtor;
1009
1010 static struct dev_ops nvmm_ops = {
1011         { "nvmm", 0, D_MPSAFE },
1012         .d_open         = nvmm_open,
1013         .d_ioctl        = nvmm_ioctl,
1014         .d_mmap_single  = nvmm_mmap_single,
1015 };
1016
1017 static int
1018 nvmm_open(struct dev_open_args *ap)
1019 {
1020         int flags = ap->a_oflags;
1021         struct nvmm_owner *owner;
1022         struct file *fp;
1023         int error;
1024
1025         if (__predict_false(nvmm_impl == NULL))
1026                 return ENXIO;
1027         if (!(flags & O_CLOEXEC))
1028                 return EINVAL;
1029
1030         if (priv_check_cred(ap->a_cred, PRIV_ROOT, 0) == 0) {
1031                 owner = &root_owner;
1032         } else {
1033                 owner = kmem_alloc(sizeof(*owner), KM_SLEEP);
1034                 owner->pid = curthread->td_proc->p_pid;
1035         }
1036
1037         fp = ap->a_fpp ? *ap->a_fpp : NULL;
1038         error = devfs_set_cdevpriv(fp, owner, nvmm_dtor);
1039         if (error) {
1040                 nvmm_dtor(owner);
1041                 return error;
1042         }
1043
1044         return 0;
1045 }
1046
1047 static void
1048 nvmm_dtor(void *arg)
1049 {
1050         struct nvmm_owner *owner = arg;
1051
1052         KASSERT(owner != NULL);
1053         nvmm_kill_machines(owner);
1054         if (owner != &root_owner) {
1055                 kmem_free(owner, sizeof(*owner));
1056         }
1057 }
1058
1059 static int
1060 nvmm_mmap_single(struct dev_mmap_single_args *ap)
1061 {
1062         vm_ooffset_t *offp = ap->a_offset;
1063         size_t size = ap->a_size;
1064         int prot = ap->a_nprot;
1065         struct vm_object **uobjp = ap->a_object;
1066         struct file *fp = ap->a_fp;
1067         struct nvmm_owner *owner = NULL;
1068         struct nvmm_machine *mach;
1069         nvmm_machid_t machid;
1070         nvmm_cpuid_t cpuid;
1071         int error;
1072
1073         devfs_get_cdevpriv(fp, (void **)&owner);
1074         KASSERT(owner != NULL);
1075
1076         if (prot & PROT_EXEC)
1077                 return EACCES;
1078         if (size != PAGE_SIZE)
1079                 return EINVAL;
1080
1081         cpuid = NVMM_COMM_CPUID(*offp);
1082         if (__predict_false(cpuid >= NVMM_MAX_VCPUS))
1083                 return EINVAL;
1084
1085         machid = NVMM_COMM_MACHID(*offp);
1086         error = nvmm_machine_get(owner, machid, &mach, false);
1087         if (error)
1088                 return error;
1089
1090         uao_reference(mach->commuobj);
1091         *uobjp = mach->commuobj;
1092         *offp = cpuid * PAGE_SIZE;
1093
1094         nvmm_machine_put(mach);
1095         return 0;
1096 }
1097
1098 static int
1099 nvmm_ioctl(struct dev_ioctl_args *ap)
1100 {
1101         unsigned long cmd = ap->a_cmd;
1102         void *data = ap->a_data;
1103         struct file *fp = ap->a_fp;
1104         struct nvmm_owner *owner = NULL;
1105
1106         devfs_get_cdevpriv(fp, (void **)&owner);
1107         KASSERT(owner != NULL);
1108
1109         switch (cmd) {
1110         case NVMM_IOC_CAPABILITY:
1111                 return nvmm_capability(owner, data);
1112         case NVMM_IOC_MACHINE_CREATE:
1113                 return nvmm_machine_create(owner, data);
1114         case NVMM_IOC_MACHINE_DESTROY:
1115                 return nvmm_machine_destroy(owner, data);
1116         case NVMM_IOC_MACHINE_CONFIGURE:
1117                 return nvmm_machine_configure(owner, data);
1118         case NVMM_IOC_VCPU_CREATE:
1119                 return nvmm_vcpu_create(owner, data);
1120         case NVMM_IOC_VCPU_DESTROY:
1121                 return nvmm_vcpu_destroy(owner, data);
1122         case NVMM_IOC_VCPU_CONFIGURE:
1123                 return nvmm_vcpu_configure(owner, data);
1124         case NVMM_IOC_VCPU_SETSTATE:
1125                 return nvmm_vcpu_setstate(owner, data);
1126         case NVMM_IOC_VCPU_GETSTATE:
1127                 return nvmm_vcpu_getstate(owner, data);
1128         case NVMM_IOC_VCPU_INJECT:
1129                 return nvmm_vcpu_inject(owner, data);
1130         case NVMM_IOC_VCPU_RUN:
1131                 return nvmm_vcpu_run(owner, data);
1132         case NVMM_IOC_GPA_MAP:
1133                 return nvmm_gpa_map(owner, data);
1134         case NVMM_IOC_GPA_UNMAP:
1135                 return nvmm_gpa_unmap(owner, data);
1136         case NVMM_IOC_HVA_MAP:
1137                 return nvmm_hva_map(owner, data);
1138         case NVMM_IOC_HVA_UNMAP:
1139                 return nvmm_hva_unmap(owner, data);
1140         case NVMM_IOC_CTL:
1141                 return nvmm_ctl(owner, data);
1142         default:
1143                 return EINVAL;
1144         }
1145 }
1146
1147 /* -------------------------------------------------------------------------- */
1148
1149 static int
1150 nvmm_attach(void)
1151 {
1152         int error;
1153
1154         error = nvmm_init();
1155         if (error)
1156                 panic("%s: impossible", __func__);
1157         printf("nvmm: attached, using backend %s\n", nvmm_impl->name);
1158
1159         return 0;
1160 }
1161
1162 static int
1163 nvmm_detach(void)
1164 {
1165         if (atomic_load_acq_int(&nmachines) > 0)
1166                 return EBUSY;
1167
1168         nvmm_fini();
1169         return 0;
1170 }
1171
1172 static int
1173 nvmm_modevent(module_t mod __unused, int type, void *data __unused)
1174 {
1175         static cdev_t dev = NULL;
1176         int error;
1177
1178         switch (type) {
1179         case MOD_LOAD:
1180                 if (nvmm_ident() == NULL) {
1181                         printf("nvmm: cpu not supported\n");
1182                         return ENOTSUP;
1183                 }
1184                 error = nvmm_attach();
1185                 if (error)
1186                         return error;
1187
1188                 dev = make_dev(&nvmm_ops, 0, UID_ROOT, GID_NVMM, 0660, "nvmm");
1189                 if (dev == NULL) {
1190                         printf("nvmm: unable to create device\n");
1191                         error = ENOMEM;
1192                 }
1193                 break;
1194
1195         case MOD_UNLOAD:
1196                 if (dev == NULL)
1197                         return 0;
1198                 error = nvmm_detach();
1199                 if (error == 0)
1200                         destroy_dev(dev);
1201                 break;
1202
1203         case MOD_SHUTDOWN:
1204                 error = 0;
1205                 break;
1206
1207         default:
1208                 error = EOPNOTSUPP;
1209                 break;
1210         }
1211
1212         return error;
1213 }
1214
1215 static moduledata_t nvmm_moddata = {
1216         .name = "nvmm",
1217         .evhand = nvmm_modevent,
1218         .priv = NULL,
1219 };
1220
1221 DECLARE_MODULE(nvmm, nvmm_moddata, SI_SUB_PSEUDO, SI_ORDER_ANY);
1222 MODULE_VERSION(nvmm, NVMM_KERN_VERSION);