From d6924570fb487b28d73396f15725fb0b111082dc Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 2 May 2019 23:03:32 -0700 Subject: [PATCH] kernel - Fix serious bug in MAP_STACK, deprecate auto-grow semantics * When MAP_STACK is used without MAP_TRYFIXED, the address the kernel determines for the stack was *NOT* being returned to userland. Instead, userland always got only the hint address. * This fixes ruby MAP_STACK use cases and possibly more. * Deprecate MAP_STACK auto-grow semantics. All user mmap() calls with MAP_STACK are now converted to normal MAP_ANON mmaps. The kernel will continue to create an auto-grow stack segment for the primary user stack in exec(), allowing older pthread libraries to continue working, but this feature is deprecated and will be removed in a future release. --- sys/kern/kern_exec.c | 2 +- sys/vm/vm_map.c | 20 ++++++++++---------- sys/vm/vm_map.h | 2 +- sys/vm/vm_mmap.c | 33 ++++++++++++++++++++++++++++++--- 4 files changed, 42 insertions(+), 15 deletions(-) diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 91c87f5fdb..d0391e9540 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -847,7 +847,7 @@ exec_new_vmspace(struct image_params *imgp, struct vmspace *vmcopy) * but allow the program to adjust that (the program may desire to * use areas of the stack for executable code). */ - error = vm_map_stack(&vmspace->vm_map, stack_addr, (vm_size_t)maxssiz, + error = vm_map_stack(&vmspace->vm_map, &stack_addr, (vm_size_t)maxssiz, 0, VM_PROT_READ|VM_PROT_WRITE, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 3e82e0e3ba..5231f8529c 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -3784,7 +3784,7 @@ vmspace_fork_uksmap_entry(vm_map_t old_map, vm_map_t new_map, * No requirements. */ int -vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, +vm_map_stack (vm_map_t map, vm_offset_t *addrbos, vm_size_t max_ssize, int flags, vm_prot_t prot, vm_prot_t max, int cow) { vm_map_entry_t prev_entry; @@ -3808,17 +3808,17 @@ vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, * Find space for the mapping */ if ((flags & (MAP_FIXED | MAP_TRYFIXED)) == 0) { - if (vm_map_findspace(map, addrbos, max_ssize, 1, + if (vm_map_findspace(map, *addrbos, max_ssize, 1, flags, &tmpaddr)) { vm_map_unlock(map); vm_map_entry_release(count); return (KERN_NO_SPACE); } - addrbos = tmpaddr; + *addrbos = tmpaddr; } /* If addr is already mapped, no go */ - if (vm_map_lookup_entry(map, addrbos, &prev_entry)) { + if (vm_map_lookup_entry(map, *addrbos, &prev_entry)) { vm_map_unlock(map); vm_map_entry_release(count); return (KERN_NO_SPACE); @@ -3849,7 +3849,7 @@ vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, else next = RB_MIN(vm_map_rb_tree, &map->rb_root); - if (next && next->start < addrbos + max_ssize) { + if (next && next->start < *addrbos + max_ssize) { vm_map_unlock(map); vm_map_entry_release(count); return (KERN_NO_SPACE); @@ -3866,8 +3866,8 @@ vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, * pass these values here in the insert call. */ rv = vm_map_insert(map, &count, NULL, NULL, - 0, addrbos + max_ssize - init_ssize, - addrbos + max_ssize, + 0, *addrbos + max_ssize - init_ssize, + *addrbos + max_ssize, VM_MAPTYPE_NORMAL, VM_SUBSYS_STACK, prot, max, cow); @@ -3880,11 +3880,11 @@ vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, if (prev_entry != NULL) { vm_map_clip_end(map, prev_entry, - addrbos + max_ssize - init_ssize, + *addrbos + max_ssize - init_ssize, &count); } - if (next->end != addrbos + max_ssize || - next->start != addrbos + max_ssize - init_ssize){ + if (next->end != *addrbos + max_ssize || + next->start != *addrbos + max_ssize - init_ssize){ panic ("Bad entry start/end for new stack entry"); } else { next->aux.avail_ssize = max_ssize - init_ssize; diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 4c647b8a84..605074f23d 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -657,7 +657,7 @@ int vm_map_madvise (vm_map_t, vm_offset_t, vm_offset_t, int, off_t); void vm_map_simplify_entry (vm_map_t, vm_map_entry_t, int *); void vm_init2 (void); int vm_uiomove (vm_map_t, vm_object_t, off_t, int, vm_offset_t, int *); -int vm_map_stack (vm_map_t, vm_offset_t, vm_size_t, int, +int vm_map_stack (vm_map_t, vm_offset_t *, vm_size_t, int, vm_prot_t, vm_prot_t, int); int vm_map_growstack (vm_map_t map, vm_offset_t addr); vm_offset_t vmspace_swap_count (struct vmspace *vmspace); diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index a0ae334a0c..c00305bc8e 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -400,10 +400,37 @@ int sys_mmap(struct mmap_args *uap) { int error; + int flags = uap->flags; + off_t upos = uap->pos; + + /* + * Work around fairly serious problems with trying to have an + * auto-grow stack segment related to other unrelated calls to + * mmap() potentially getting addresses within such segments. + * + * Our attempt to use TRYFIXED to mediate the problem basically + * failed. For example, rtld-elf uses it to try to optimize + * shlib placement, but could run afoul of this issue. + * + * The only remaining true MAP_STACK we allow is the user stack as + * created by the exec code. All userland MAP_STACK's are converted + * to normal mmap()s right here. + */ + if (flags & MAP_STACK) { + if (uap->fd != -1) + return (EINVAL); + if ((uap->prot & (PROT_READ|PROT_WRITE)) != + (PROT_READ|PROT_WRITE)) { + return (EINVAL); + } + flags &= ~MAP_STACK; + flags |= MAP_ANON; + upos = 0; + } error = kern_mmap(curproc->p_vmspace, uap->addr, uap->len, - uap->prot, uap->flags, - uap->fd, uap->pos, &uap->sysmsg_resultp); + uap->prot, flags, + uap->fd, upos, &uap->sysmsg_resultp); return (error); } @@ -1394,7 +1421,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, VM_MAPTYPE_UKSMAP, VM_SUBSYS_MMAP, prot, maxprot, docow); } else if (flags & MAP_STACK) { - rv = vm_map_stack(map, *addr, size, flags, + rv = vm_map_stack(map, addr, size, flags, prot, maxprot, docow); } else if (flags & MAP_VPAGETABLE) { rv = vm_map_find(map, object, NULL, -- 2.41.0