kernel - Implement NX (2)
authorMatthew Dillon <dillon@apollo.backplane.com>
Mon, 3 Apr 2017 23:18:39 +0000 (16:18 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Mon, 3 Apr 2017 23:23:36 +0000 (16:23 -0700)
* Flesh out NX implementation for main kernel.

* Implement NX support for the vkernel.

sys/platform/pc64/x86_64/pmap.c
sys/platform/vkernel64/platform/init.c
sys/platform/vkernel64/platform/pmap.c
sys/platform/vkernel64/x86_64/trap.c
sys/sys/vkernel.h
sys/vm/vm_fault.c
sys/vm/vm_mmap.c

index 95c7caf..dd9108b 100644 (file)
@@ -4557,7 +4557,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 
        if (pmap == NULL)
                return;
-       if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
+       if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == VM_PROT_NONE) {
                pmap_remove(pmap, sva, eva);
                return;
        }
index fbb49b8..909af19 100644 (file)
@@ -624,7 +624,7 @@ init_kern_memory(void)
         */
 
        base = mmap((void*)KERNEL_KVA_START, KERNEL_KVA_SIZE,
-                   PROT_READ|PROT_WRITE,
+                   PROT_READ|PROT_WRITE|PROT_EXEC,
                    MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED|MAP_TRYFIXED,
                    MemImageFd, (off_t)KERNEL_KVA_START);
 
@@ -772,7 +772,7 @@ init_kern_memory_vmm(void)
 
        /* Alloc a new stack in the lowmem */
        vkernel_stack = mmap(NULL, KERNEL_STACK_SIZE,
-                            PROT_READ|PROT_WRITE|PROT_EXEC,
+                            PROT_READ|PROT_WRITE|PROT_EXEC|PROT_EXEC,
                             MAP_ANON, -1, 0);
        if (vkernel_stack == MAP_FAILED) {
                err(1, "Unable to allocate stack\n");
@@ -1671,7 +1671,7 @@ vkernel_module_memory_alloc(vm_offset_t *basep, size_t bytes)
        bzero((void *)*basep, bytes);
 #else
        *basep = (vm_offset_t)mmap((void *)0x000000000, bytes,
-                                  PROT_READ|PROT_WRITE|PROT_EXEC,
+                                  PROT_READ|PROT_WRITE|PROT_EXEC|PROT_EXEC,
                                   MAP_ANON|MAP_SHARED, -1, 0);
        if ((void *)*basep == MAP_FAILED)
                return ENOMEM;
index 2c026b1..72b8f66 100644 (file)
@@ -128,7 +128,7 @@ static pd_entry_t *pmap_pde(pmap_t pmap, vm_offset_t va);
  */
 #define pte_prot(m, p)         \
        (protection_codes[p & (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)])
-static int protection_codes[8];
+static uint64_t protection_codes[8];
 
 struct pmap kernel_pmap;
 
@@ -1703,7 +1703,7 @@ cpu_vmspace_alloc(struct vmspace *vm)
                panic("vmspace_create() failed");
 
        rp = vmspace_mmap(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE,
-                         PROT_READ|PROT_WRITE,
+                         PROT_READ|PROT_WRITE|PROT_EXEC,
                          MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED,
                          MemImageFd, 0);
        if (rp == MAP_FAILED)
@@ -2228,7 +2228,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
        if (pmap == NULL)
                return;
 
-       if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
+       if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == VM_PROT_NONE) {
                pmap_remove(pmap, sva, eva);
                return;
        }
@@ -3061,7 +3061,6 @@ restart:
 void
 pmap_page_protect(vm_page_t m, vm_prot_t prot)
 {
-       /* JG NX support? */
        if ((prot & VM_PROT_WRITE) == 0) {
                if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
                        pmap_clearbit(m, VPTE_RW);
@@ -3170,20 +3169,20 @@ pmap_clear_reference(vm_page_t m)
 /*
  * Miscellaneous support routines follow
  */
-
 static void
 i386_protection_init(void)
 {
-       int *kp, prot;
+       uint64_t *kp;
+       int prot;
 
        kp = protection_codes;
        for (prot = 0; prot < 8; prot++) {
                if (prot & VM_PROT_READ)
-                       *kp |= 0; /* if it's VALID is readeable */
+                       *kp |= 0;                       /* R */
                if (prot & VM_PROT_WRITE)
-                       *kp |= VPTE_RW;
-               if (prot & VM_PROT_EXECUTE)
-                       *kp |= 0; /* if it's VALID is executable */
+                       *kp |= VPTE_RW;                 /* R+W */
+               if (prot && (prot & VM_PROT_EXECUTE) == 0)
+                       *kp |= VPTE_NX;                 /* NX - !executable */
                ++kp;
        }
 }
index 4eb816e..012757d 100644 (file)
@@ -838,6 +838,8 @@ trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva)
 
        if (frame->tf_err & PGEX_W)
                ftype = VM_PROT_READ | VM_PROT_WRITE;
+       else if (frame->tf_err & PGEX_I)
+               ftype = VM_PROT_EXECUTE;
        else
                ftype = VM_PROT_READ;
 
index 620f940..1a478fa 100644 (file)
@@ -150,7 +150,7 @@ typedef u_long      vpte_t;
 #define VPTE_G         0x00000100      /* global bit ?? */
 #define VPTE_WIRED     0x00000200      /* wired */
 #define VPTE_MANAGED   0x00000400      /* managed bit ?? */
-
+#define VPTE_NX                0x00000800      /* no-execute bit */
 
 #endif
 
index 58bb913..6976599 100644 (file)
@@ -1302,7 +1302,8 @@ vm_fault_vpagetable(struct faultstate *fs, vm_pindex_t *pindex,
        for (;;) {
                /*
                 * We cannot proceed if the vpte is not valid, not readable
-                * for a read fault, or not writable for a write fault.
+                * for a read fault, not writable for a write fault, or
+                * not executable for an instruction execution fault.
                 */
                if ((vpte & VPTE_V) == 0) {
                        unlock_and_deallocate(fs);
@@ -1312,6 +1313,10 @@ vm_fault_vpagetable(struct faultstate *fs, vm_pindex_t *pindex,
                        unlock_and_deallocate(fs);
                        return (KERN_FAILURE);
                }
+               if ((fault_type & VM_PROT_EXECUTE) && (vpte & VPTE_NX)) {
+                       unlock_and_deallocate(fs);
+                       return (KERN_FAILURE);
+               }
                if ((vpte & VPTE_PS) || vshift == 0)
                        break;
 
@@ -1366,7 +1371,7 @@ vm_fault_vpagetable(struct faultstate *fs, vm_pindex_t *pindex,
 
                        if ((fault_type & VM_PROT_WRITE) && (vpte & VPTE_RW))
                                nvpte |= VPTE_M | VPTE_A;
-                       if (fault_type & VM_PROT_READ)
+                       if (fault_type & (VM_PROT_READ | VM_PROT_EXECUTE))
                                nvpte |= VPTE_A;
                        if (vpte == nvpte)
                                break;
@@ -1399,6 +1404,12 @@ vm_fault_vpagetable(struct faultstate *fs, vm_pindex_t *pindex,
                fs->first_prot &= ~VM_PROT_WRITE;
        }
 
+       /*
+        * Disable EXECUTE perms if NX bit is set.
+        */
+       if (vpte & VPTE_NX)
+               fs->first_prot &= ~VM_PROT_EXECUTE;
+
        /*
         * Combine remaining address bits with the vpte.
         */
index 2dbe6c3..60134ae 100644 (file)
@@ -184,8 +184,9 @@ kern_mmap(struct vmspace *vms, caddr_t uaddr, size_t ulen,
                return (EINVAL);
 
        if (flags & MAP_STACK) {
-               if ((fd != -1) ||
-                   ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE)))
+               if (fd != -1)
+                       return (EINVAL);
+               if ((prot & (PROT_READ|PROT_WRITE)) != (PROT_READ|PROT_WRITE))
                        return (EINVAL);
                flags |= MAP_ANON;
                pos = 0;
@@ -342,7 +343,7 @@ kern_mmap(struct vmspace *vms, caddr_t uaddr, size_t ulen,
                         * credentials do we use for determination? What if
                         * proc does a setuid?
                         */
-                       maxprot = VM_PROT_EXECUTE;      /* ??? */
+                       maxprot = VM_PROT_EXECUTE;
                        if (fp->f_flag & FREAD) {
                                maxprot |= VM_PROT_READ;
                        } else if (prot & PROT_READ) {
@@ -593,10 +594,6 @@ sys_mprotect(struct mprotect_args *uap)
        addr = (vm_offset_t) uap->addr;
        size = uap->len;
        prot = uap->prot & VM_PROT_ALL;
-#if defined(VM_PROT_READ_IS_EXEC)
-       if (prot & VM_PROT_READ)
-               prot |= VM_PROT_EXECUTE;
-#endif
 
        pageoff = (addr & PAGE_MASK);
        addr -= pageoff;
@@ -1396,14 +1393,6 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
        if (flags & MAP_NOCORE)
                docow |= MAP_DISABLE_COREDUMP;
 
-#if defined(VM_PROT_READ_IS_EXEC)
-       if (prot & VM_PROT_READ)
-               prot |= VM_PROT_EXECUTE;
-
-       if (maxprot & VM_PROT_READ)
-               maxprot |= VM_PROT_EXECUTE;
-#endif
-
        /*
         * This may place the area in its own page directory if (size) is
         * large enough, otherwise it typically returns its argument.