From a108bf718cc1205ef0755235ee340cb6a9a9053c Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Wed, 27 Aug 2003 01:43:08 +0000 Subject: [PATCH] SLAB ALLOCATOR Stage 1. This brings in a slab allocator written from scratch by your's truely. A detailed explanation of the allocator is included but first, other changes: * Instead of having vm_map_entry_insert*() and friends allocate the vm_map_entry structures a new mechanism has been emplaced where by the vm_map_entry structures are reserved at a higher level, then expected to exist in the free pool in deep vm_map code. This preliminary implementation may eventually turn into something more sophisticated that includes things like pmap entries and so forth. The idea is to convert what should be low level routines (VM object and map manipulation) back into low level routines. * vm_map_entry structure are now per-cpu cached, which is integrated into the the reservation model above. * The zalloc 'kmapentzone' has been removed. We now only have 'mapentzone'. * There were race conditions between vm_map_findspace() and actually entering the map_entry with vm_map_insert(). These have been closed through the vm_map_entry reservation model described above. * Two new kernel config options now work. NO_KMEM_MAP has been fleshed out a bit more and a number of deadlocks related to having only the kernel_map now have been fixed. The USE_SLAB_ALLOCATOR option will cause the kernel to compile-in the slab allocator instead of the original malloc allocator. If you specify USE_SLAB_ALLOCATOR you must also specify NO_KMEM_MAP. * vm_poff_t and vm_paddr_t integer types have been added. These are meant to represent physical addresses and offsets (physical memory might be larger then virtual memory, for example Intel PAE). They are not heavily used yet but the intention is to separate physical representation from virtual representation. SLAB ALLOCATOR FEATURES The slab allocator breaks allocations up into approximately 80 zones based on their size. Each zone has a chunk size (alignment). For example, all allocations in the 1-8 byte range will allocate in chunks of 8 bytes. Each size zone is backed by one or more blocks of memory. The size of these blocks is fixed at ZoneSize, which is calculated at boot time to be between 32K and 128K. The use of a fixed block size allows us to locate the zone header given a memory pointer with a simple masking operation. The slab allocator operates on a per-cpu basis. The cpu that allocates a zone block owns it. free() checks the cpu that owns the zone holding the memory pointer being freed and forwards the request to the appropriate cpu through an asynchronous IPI. This request is not currently optimized but it can theoretically be heavily optimized ('queued') to the point where the overhead becomes inconsequential. As of this commit the malloc_type information is not MP safe, but the core slab allocation and deallocation algorithms, non-inclusive the having to allocate the backing block, *ARE* MP safe. The core code requires no mutexes or locks, only a critical section. Each zone contains N allocations of a fixed chunk size. For example, a 128K zone can hold approximately 16000 or so 8 byte allocations. The zone is initially zero'd and new allocations are simply allocated linearly out of the zone. When a chunk is freed it is entered into a linked list and the next allocation request will reuse it. The slab allocator heavily optimizes M_ZERO operations at both the page level and the chunk level. The slab allocator maintains various undocumented malloc quirks such as ensuring that small power-of-2 allocations are aligned to their size, and malloc(0) requests are also allowed and return a non-NULL result. kern_tty.c depends heavily on the power-of-2 alignment feature and ahc depends on the malloc(0) feature. Eventually we may remove the malloc(0) feature. PROBLEMS AS OF THIS COMMIT NOTE! This commit may destabilize the kernel a bit. There are issues with the ISA DMA area ('bounce' buffer allocation) due to the large backing block size used by the slab allocator and there are probably some deadlock issues do to the removal of kmem_map that have not yet been resolved. --- sys/conf/files | 3 +- sys/conf/options | 3 +- sys/cpu/i386/include/types.h | 12 +- sys/i386/i386/mp_machdep.c | 14 +- sys/i386/include/types.h | 12 +- sys/kern/imgact_aout.c | 17 +- sys/kern/imgact_elf.c | 12 +- sys/kern/kern_malloc.c | 10 +- sys/kern/kern_slaballoc.c | 840 ++++++++++++++++++++++++++++ sys/kern/sys_process.c | 6 +- sys/kern/vfs_bio.c | 16 +- sys/platform/pc32/i386/mp_machdep.c | 14 +- sys/sys/globaldata.h | 19 +- sys/sys/malloc.h | 19 +- sys/sys/slaballoc.h | 98 ++++ sys/vfs/procfs/procfs_mem.c | 6 +- sys/vm/vm_extern.h | 3 +- sys/vm/vm_fault.c | 4 +- sys/vm/vm_init.c | 3 +- sys/vm/vm_kern.c | 79 ++- sys/vm/vm_map.c | 498 +++++++++++------ sys/vm/vm_map.h | 30 +- sys/vm/vm_page.c | 9 +- sys/vm/vm_zone.c | 13 +- sys/vm/vm_zone.h | 3 +- 25 files changed, 1500 insertions(+), 243 deletions(-) create mode 100644 sys/kern/kern_slaballoc.c create mode 100644 sys/sys/slaballoc.h diff --git a/sys/conf/files b/sys/conf/files index 21c4eb99f0..391e6454f0 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,5 +1,5 @@ # $FreeBSD: src/sys/conf/files,v 1.340.2.137 2003/06/04 17:10:30 sam Exp $ -# $DragonFly: src/sys/conf/files,v 1.10 2003/08/07 21:16:48 dillon Exp $ +# $DragonFly: src/sys/conf/files,v 1.11 2003/08/27 01:43:05 dillon Exp $ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and @@ -622,6 +622,7 @@ kern/kern_proc.c standard kern/kern_prot.c standard kern/kern_random.c standard kern/kern_resource.c standard +kern/kern_slaballoc.c standard kern/kern_shutdown.c standard kern/kern_sig.c standard kern/kern_subr.c standard diff --git a/sys/conf/options b/sys/conf/options index 1a4da2fa5a..359edcb424 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -1,5 +1,5 @@ # $FreeBSD: src/sys/conf/options,v 1.191.2.53 2003/06/04 17:56:58 sam Exp $ -# $DragonFly: src/sys/conf/options,v 1.4 2003/08/25 19:50:22 dillon Exp $ +# $DragonFly: src/sys/conf/options,v 1.5 2003/08/27 01:43:05 dillon Exp $ # # On the handling of kernel options # @@ -397,6 +397,7 @@ VFS_BIO_DEBUG opt_global.h # These are VM related options NO_KMEM_MAP opt_global.h +USE_SLAB_ALLOCATOR opt_vm.h VM_KMEM_SIZE opt_vm.h VM_KMEM_SIZE_SCALE opt_vm.h VM_KMEM_SIZE_MAX opt_vm.h diff --git a/sys/cpu/i386/include/types.h b/sys/cpu/i386/include/types.h index d9ad94f2cb..7eb0ac5bb7 100644 --- a/sys/cpu/i386/include/types.h +++ b/sys/cpu/i386/include/types.h @@ -32,7 +32,7 @@ * * @(#)types.h 8.3 (Berkeley) 1/5/94 * $FreeBSD: src/sys/i386/include/types.h,v 1.19.2.1 2001/03/21 10:50:58 peter Exp $ - * $DragonFly: src/sys/cpu/i386/include/types.h,v 1.4 2003/06/29 03:28:43 dillon Exp $ + * $DragonFly: src/sys/cpu/i386/include/types.h,v 1.5 2003/08/27 01:43:07 dillon Exp $ */ #ifndef _MACHINE_TYPES_H_ @@ -48,10 +48,12 @@ typedef struct label_t { } label_t; #endif -typedef unsigned int vm_offset_t; -typedef __int64_t vm_ooffset_t; -typedef unsigned int vm_pindex_t; -typedef unsigned int vm_size_t; +typedef unsigned int vm_offset_t; /* address space bounded offset */ +typedef unsigned int vm_size_t; /* address space bounded size */ +typedef __int64_t vm_ooffset_t; /* VM object bounded offset */ +typedef unsigned int vm_pindex_t; /* physical page index */ +typedef __uint64_t vm_poff_t; /* physical offset */ +typedef __uint64_t vm_paddr_t; /* physical addr (same as vm_poff_t) */ typedef __int32_t register_t; typedef __uint32_t u_register_t; diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index 5459af867c..1bb66ae44a 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -23,7 +23,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.115.2.15 2003/03/14 21:22:35 jhb Exp $ - * $DragonFly: src/sys/i386/i386/Attic/mp_machdep.c,v 1.15 2003/08/26 21:42:18 rob Exp $ + * $DragonFly: src/sys/i386/i386/Attic/mp_machdep.c,v 1.16 2003/08/27 01:43:07 dillon Exp $ */ #include "opt_cpu.h" @@ -2193,6 +2193,9 @@ install_ap_tramp(u_int boot_addr) * to accomplish this. This is necessary because of the nuances * of the different hardware we might encounter. It ain't pretty, * but it seems to work. + * + * NOTE: eventually an AP gets to ap_init(), which is called just + * before the AP goes into the LWKT scheduler's idle loop. */ static int start_ap(int logical_cpu, u_int boot_addr) @@ -2461,6 +2464,15 @@ ap_init(void) smp_active = 1; /* historic */ } + /* + * AP helper function for kernel memory support. This will create + * a memory reserve for the AP that is necessary to avoid certain + * memory deadlock situations, such as when the kernel_map needs + * a vm_map_entry and zalloc has no free entries and tries to allocate + * a new one from the ... kernel_map :-) + */ + kmem_cpu_init(); + /* * Startup helper thread(s) one per cpu. */ diff --git a/sys/i386/include/types.h b/sys/i386/include/types.h index dcfd6c9bda..81ffca2bd3 100644 --- a/sys/i386/include/types.h +++ b/sys/i386/include/types.h @@ -32,7 +32,7 @@ * * @(#)types.h 8.3 (Berkeley) 1/5/94 * $FreeBSD: src/sys/i386/include/types.h,v 1.19.2.1 2001/03/21 10:50:58 peter Exp $ - * $DragonFly: src/sys/i386/include/Attic/types.h,v 1.4 2003/06/29 03:28:43 dillon Exp $ + * $DragonFly: src/sys/i386/include/Attic/types.h,v 1.5 2003/08/27 01:43:07 dillon Exp $ */ #ifndef _MACHINE_TYPES_H_ @@ -48,10 +48,12 @@ typedef struct label_t { } label_t; #endif -typedef unsigned int vm_offset_t; -typedef __int64_t vm_ooffset_t; -typedef unsigned int vm_pindex_t; -typedef unsigned int vm_size_t; +typedef unsigned int vm_offset_t; /* address space bounded offset */ +typedef unsigned int vm_size_t; /* address space bounded size */ +typedef __int64_t vm_ooffset_t; /* VM object bounded offset */ +typedef unsigned int vm_pindex_t; /* physical page index */ +typedef __uint64_t vm_poff_t; /* physical offset */ +typedef __uint64_t vm_paddr_t; /* physical addr (same as vm_poff_t) */ typedef __int32_t register_t; typedef __uint32_t u_register_t; diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c index 9388097637..4d3f2a8218 100644 --- a/sys/kern/imgact_aout.c +++ b/sys/kern/imgact_aout.c @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/kern/imgact_aout.c,v 1.59.2.5 2001/11/03 01:41:08 ps Exp $ - * $DragonFly: src/sys/kern/imgact_aout.c,v 1.5 2003/08/26 21:09:02 rob Exp $ + * $DragonFly: src/sys/kern/imgact_aout.c,v 1.6 2003/08/27 01:43:07 dillon Exp $ */ #include @@ -77,12 +77,12 @@ struct sysentvec aout_sysvec = { }; static int -exec_aout_imgact(imgp) - struct image_params *imgp; +exec_aout_imgact(struct image_params *imgp) { const struct exec *a_out = (const struct exec *) imgp->image_header; struct vmspace *vmspace; struct vnode *vp; + int count; vm_map_t map; vm_object_t object; vm_offset_t text_end, data_end; @@ -182,44 +182,49 @@ exec_aout_imgact(imgp) vp = imgp->vp; map = &vmspace->vm_map; + count = vm_map_entry_reserve(MAP_RESERVE_COUNT); vm_map_lock(map); VOP_GETVOBJECT(vp, &object); vm_object_reference(object); text_end = virtual_offset + a_out->a_text; - error = vm_map_insert(map, object, + error = vm_map_insert(map, &count, object, file_offset, virtual_offset, text_end, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_ALL, MAP_COPY_ON_WRITE | MAP_PREFAULT); if (error) { vm_map_unlock(map); + vm_map_entry_release(count); return (error); } data_end = text_end + a_out->a_data; if (a_out->a_data) { vm_object_reference(object); - error = vm_map_insert(map, object, + error = vm_map_insert(map, &count, object, file_offset + a_out->a_text, text_end, data_end, VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE | MAP_PREFAULT); if (error) { vm_map_unlock(map); + vm_map_entry_release(count); return (error); } } if (bss_size) { - error = vm_map_insert(map, NULL, 0, + error = vm_map_insert(map, &count, NULL, 0, data_end, data_end + bss_size, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) { vm_map_unlock(map); + vm_map_entry_release(count); return (error); } } vm_map_unlock(map); + vm_map_entry_release(count); /* Fill in process VM information */ vmspace->vm_tsize = a_out->a_text >> PAGE_SHIFT; diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index bd3d950804..9aac0a15bb 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -27,7 +27,7 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/kern/imgact_elf.c,v 1.73.2.13 2002/12/28 19:49:41 dillon Exp $ - * $DragonFly: src/sys/kern/imgact_elf.c,v 1.7 2003/08/26 21:09:02 rob Exp $ + * $DragonFly: src/sys/kern/imgact_elf.c,v 1.8 2003/08/27 01:43:07 dillon Exp $ */ #include @@ -190,6 +190,7 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o size_t map_len; vm_offset_t map_addr; int error, rv, cow; + int count; size_t copy_len; vm_object_t object; vm_offset_t file_addr; @@ -234,8 +235,9 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o cow = MAP_COPY_ON_WRITE | MAP_PREFAULT | (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP); + count = vm_map_entry_reserve(MAP_RESERVE_COUNT); vm_map_lock(&vmspace->vm_map); - rv = vm_map_insert(&vmspace->vm_map, + rv = vm_map_insert(&vmspace->vm_map, &count, object, file_addr, /* file offset */ map_addr, /* virtual start */ @@ -244,6 +246,7 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o VM_PROT_ALL, cow); vm_map_unlock(&vmspace->vm_map); + vm_map_entry_release(count); if (rv != KERN_SUCCESS) { vm_object_deallocate(object); return EINVAL; @@ -268,11 +271,14 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o /* This had damn well better be true! */ if (map_len != 0) { + count = vm_map_entry_reserve(MAP_RESERVE_COUNT); vm_map_lock(&vmspace->vm_map); - rv = vm_map_insert(&vmspace->vm_map, NULL, 0, + rv = vm_map_insert(&vmspace->vm_map, &count, + NULL, 0, map_addr, map_addr + map_len, VM_PROT_ALL, VM_PROT_ALL, 0); vm_map_unlock(&vmspace->vm_map); + vm_map_entry_release(count); if (rv != KERN_SUCCESS) { return EINVAL; } diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c index 9925e17481..daef379a1e 100644 --- a/sys/kern/kern_malloc.c +++ b/sys/kern/kern_malloc.c @@ -32,11 +32,13 @@ * * @(#)kern_malloc.c 8.3 (Berkeley) 1/4/94 * $FreeBSD: src/sys/kern/kern_malloc.c,v 1.64.2.5 2002/03/16 02:19:51 archie Exp $ - * $DragonFly: src/sys/kern/Attic/kern_malloc.c,v 1.11 2003/08/26 21:09:02 rob Exp $ + * $DragonFly: src/sys/kern/Attic/kern_malloc.c,v 1.12 2003/08/27 01:43:07 dillon Exp $ */ #include "opt_vm.h" +#if !defined(USE_SLAB_ALLOCATOR) + #include #include #include @@ -587,7 +589,7 @@ malloc_init(data) { struct malloc_type *type = (struct malloc_type *)data; #if defined(NO_KMEM_MAP) - uintptr_t limsize; + vm_poff_t limsize; #endif if (type->ks_magic != M_MAGIC) @@ -604,7 +606,7 @@ malloc_init(data) * memory or 1/10 of our KVA space, whichever is lower. */ #if defined(NO_KMEM_MAP) - limsize = (uintptr_t)vmstats.v_page_count * PAGE_SIZE; + limsize = (vm_poff_t)vmstats.v_page_count * PAGE_SIZE; if (limsize > VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) limsize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; type->ks_limit = limsize / 10; @@ -668,3 +670,5 @@ malloc_uninit(data) type->ks_next = NULL; type->ks_limit = 0; } + +#endif diff --git a/sys/kern/kern_slaballoc.c b/sys/kern/kern_slaballoc.c new file mode 100644 index 0000000000..ca1f7f9ee6 --- /dev/null +++ b/sys/kern/kern_slaballoc.c @@ -0,0 +1,840 @@ +/* + * KERN_SLABALLOC.C - Kernel SLAB memory allocator + * + * Copyright (c) 2003 Matthew Dillon + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/kern/kern_slaballoc.c,v 1.1 2003/08/27 01:43:07 dillon Exp $ + * + * This module implements a slab allocator drop-in replacement for the + * kernel malloc(). + * + * A slab allocator reserves a ZONE for each chunk size, then lays the + * chunks out in an array within the zone. Allocation and deallocation + * is nearly instantanious, and fragmentation/overhead losses are limited + * to a fixed worst-case amount. + * + * The downside of this slab implementation is in the chunk size + * multiplied by the number of zones. ~80 zones * 128K = 10MB of VM per cpu. + * In a kernel implementation all this memory will be physical so + * the zone size is adjusted downward on machines with less physical + * memory. The upside is that overhead is bounded... this is the *worst* + * case overhead. + * + * Slab management is done on a per-cpu basis and no locking or mutexes + * are required, only a critical section. When one cpu frees memory + * belonging to another cpu's slab manager an asynchronous IPI message + * will be queued to execute the operation. In addition, both the + * high level slab allocator and the low level zone allocator optimize + * M_ZERO requests, and the slab allocator does not have to pre initialize + * the linked list of chunks. + * + * XXX Balancing is needed between cpus. Balance will be handled through + * asynchronous IPIs primarily by reassigning the z_Cpu ownership of chunks. + * + * XXX If we have to allocate a new zone and M_USE_RESERVE is set, use of + * the new zone should be restricted to M_USE_RESERVE requests only. + * + * Alloc Size Chunking Number of zones + * 0-127 8 16 + * 128-255 16 8 + * 256-511 32 8 + * 512-1023 64 8 + * 1024-2047 128 8 + * 2048-4095 256 8 + * 4096-8191 512 8 + * 8192-16383 1024 8 + * 16384-32767 2048 8 + * (if PAGE_SIZE is 4K the maximum zone allocation is 16383) + * + * Allocations >= ZALLOC_ZONE_LIMIT go directly to kmem. + * + * API REQUIREMENTS AND SIDE EFFECTS + * + * To operate as a drop-in replacement to the FreeBSD-4.x malloc() we + * have remained compatible with the following API requirements: + * + * + small power-of-2 sized allocations are power-of-2 aligned (kern_tty) + * + malloc(0) is allowed and returns non-NULL (ahc driver) + * + ability to allocate arbitrarily large chunks of memory + */ + +#include "opt_vm.h" + +#if defined(USE_SLAB_ALLOCATOR) + +#if !defined(NO_KMEM_MAP) +#error "NO_KMEM_MAP must be defined when USE_SLAB_ALLOCATOR is defined" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) + +/* + * Fixed globals (not per-cpu) + */ +static int ZoneSize; +static int ZonePageCount; +static int ZonePageLimit; +static int ZoneMask; +static struct malloc_type *kmemstatistics; +static struct kmemusage *kmemusage; +static int32_t weirdary[16]; + +static void *kmem_slab_alloc(vm_size_t bytes, vm_offset_t align, int flags); +static void kmem_slab_free(void *ptr, vm_size_t bytes); + +/* + * Misc constants. Note that allocations that are exact multiples of + * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. + * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. + */ +#define MIN_CHUNK_SIZE 8 /* in bytes */ +#define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) +#define ZONE_RELS_THRESH 2 /* threshold number of zones */ +#define IN_SAME_PAGE_MASK (~(intptr_t)PAGE_MASK | MIN_CHUNK_MASK) + +/* + * The WEIRD_ADDR is used as known text to copy into free objects to + * try to create deterministic failure cases if the data is accessed after + * free. + */ +#define WEIRD_ADDR 0xdeadc0de +#define MAX_COPY sizeof(weirdary) +#define ZERO_LENGTH_PTR ((void *)-8) + +/* + * Misc global malloc buckets + */ + +MALLOC_DEFINE(M_CACHE, "cache", "Various Dynamically allocated caches"); +MALLOC_DEFINE(M_DEVBUF, "devbuf", "device driver memory"); +MALLOC_DEFINE(M_TEMP, "temp", "misc temporary data buffers"); + +MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options"); +MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery"); + +/* + * Initialize the slab memory allocator. We have to choose a zone size based + * on available physical memory. We choose a zone side which is approximately + * 1/1024th of our memory, so if we have 128MB of ram we have a zone size of + * 128K. The zone size is limited to the bounds set in slaballoc.h + * (typically 32K min, 128K max). + */ +static void kmeminit(void *dummy); + +SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, kmeminit, NULL) + +static void +kmeminit(void *dummy) +{ + vm_poff_t limsize; + int usesize; + int i; + vm_pindex_t npg; + + limsize = (vm_poff_t)vmstats.v_page_count * PAGE_SIZE; + if (limsize > VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + limsize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; + + usesize = (int)(limsize / 1024); /* convert to KB */ + + ZoneSize = ZALLOC_MIN_ZONE_SIZE; + while (ZoneSize < ZALLOC_MAX_ZONE_SIZE && (ZoneSize << 1) < usesize) + ZoneSize <<= 1; + ZoneMask = ZoneSize - 1; + ZonePageLimit = PAGE_SIZE * 4; + ZonePageCount = ZoneSize / PAGE_SIZE; + + npg = (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE; + kmemusage = kmem_slab_alloc(npg * sizeof(struct kmemusage), PAGE_SIZE, M_ZERO); + + for (i = 0; i < arysize(weirdary); ++i) + weirdary[i] = WEIRD_ADDR; + + if (bootverbose) + printf("Slab ZoneSize set to %dKB\n", ZoneSize / 1024); +} + +/* + * Initialize a malloc type tracking structure. NOTE! counters and such + * need to be made per-cpu (maybe with a MAXCPU array). + */ +void +malloc_init(void *data) +{ + struct malloc_type *type = data; + vm_poff_t limsize; + + if (type->ks_magic != M_MAGIC) + panic("malloc type lacks magic"); + + if (type->ks_limit != 0) + return; + + if (vmstats.v_page_count == 0) + panic("malloc_init not allowed before vm init"); + + limsize = (vm_poff_t)vmstats.v_page_count * PAGE_SIZE; + if (limsize > VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + limsize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; + type->ks_limit = limsize / 10; + + type->ks_next = kmemstatistics; + kmemstatistics = type; +} + +void +malloc_uninit(void *data) +{ + struct malloc_type *type = data; + struct malloc_type *t; + + if (type->ks_magic != M_MAGIC) + panic("malloc type lacks magic"); + + if (vmstats.v_page_count == 0) + panic("malloc_uninit not allowed before vm init"); + + if (type->ks_limit == 0) + panic("malloc_uninit on uninitialized type"); + +#ifdef INVARIANTS + if (type->ks_memuse != 0) { + printf("malloc_uninit: %ld bytes of '%s' still allocated\n", + type->ks_memuse, type->ks_shortdesc); + } +#endif + if (type == kmemstatistics) { + kmemstatistics = type->ks_next; + } else { + for (t = kmemstatistics; t->ks_next != NULL; t = t->ks_next) { + if (t->ks_next == type) { + t->ks_next = type->ks_next; + break; + } + } + } + type->ks_next = NULL; + type->ks_limit = 0; +} + +/* + * Calculate the zone index for the allocation request size and set the + * allocation request size to that particular zone's chunk size. + */ +static __inline int +zoneindex(unsigned long *bytes) +{ + unsigned int n = (unsigned int)*bytes; /* unsigned for shift opt */ + if (n < 128) { + *bytes = n = (n + 7) & ~7; + return(n / 8 - 1); /* 8 byte chunks, 16 zones */ + } + if (n < 256) { + *bytes = n = (n + 15) & ~15; + return(n / 16 + 7); + } + if (n < 8192) { + if (n < 512) { + *bytes = n = (n + 31) & ~31; + return(n / 32 + 15); + } + if (n < 1024) { + *bytes = n = (n + 63) & ~63; + return(n / 64 + 23); + } + if (n < 2048) { + *bytes = n = (n + 127) & ~127; + return(n / 128 + 31); + } + if (n < 4096) { + *bytes = n = (n + 255) & ~255; + return(n / 256 + 39); + } + *bytes = n = (n + 511) & ~511; + return(n / 512 + 47); + } +#if ZALLOC_ZONE_LIMIT > 8192 + if (n < 16384) { + *bytes = n = (n + 1023) & ~1023; + return(n / 1024 + 55); + } +#endif +#if ZALLOC_ZONE_LIMIT > 16384 + if (n < 32768) { + *bytes = n = (n + 2047) & ~2047; + return(n / 2048 + 63); + } +#endif + panic("Unexpected byte count %d", n); + return(0); +} + +/* + * malloc() (SLAB ALLOCATOR) + * + * Allocate memory via the slab allocator. If the request is too large, + * or if it page-aligned beyond a certain size, we fall back to the + * KMEM subsystem. A SLAB tracking descriptor must be specified, use + * &SlabMisc if you don't care. + * + * M_NOWAIT - return NULL instead of blocking. + * M_ZERO - zero the returned memory. + * M_USE_RESERVE - allocate out of the system reserve if necessary + */ +void * +malloc(unsigned long size, struct malloc_type *type, int flags) +{ + SLZone *z; + SLChunk *chunk; + SLGlobalData *slgd; + int zi; + + slgd = &mycpu->gd_slab; + + /* + * XXX silly to have this in the critical path. + */ + if (type->ks_limit == 0) { + crit_enter(); + if (type->ks_limit == 0) + malloc_init(type); + crit_exit(); + } + ++type->ks_calls; + + /* + * Handle the case where the limit is reached. Panic if can't return + * NULL. XXX the original malloc code looped, but this tended to + * simply deadlock the computer. + */ + while (type->ks_memuse >= type->ks_limit) { + if (flags & (M_NOWAIT|M_NULLOK)) + return(NULL); + panic("%s: malloc limit exceeded", type->ks_shortdesc); + } + + /* + * Handle the degenerate size == 0 case. Yes, this does happen. + * Return a special pointer. This is to maintain compatibility with + * the original malloc implementation. Certain devices, such as the + * adaptec driver, not only allocate 0 bytes, they check for NULL and + * also realloc() later on. Joy. + */ + if (size == 0) + return(ZERO_LENGTH_PTR); + + /* + * Handle large allocations directly. There should not be very many of + * these so performance is not a big issue. + * + * Guarentee page alignment for allocations in multiples of PAGE_SIZE + */ + if (size >= ZALLOC_ZONE_LIMIT || (size & PAGE_MASK) == 0) { + struct kmemusage *kup; + + size = round_page(size); + chunk = kmem_slab_alloc(size, PAGE_SIZE, flags); + if (chunk == NULL) + return(NULL); + flags &= ~M_ZERO; /* result already zero'd if M_ZERO was set */ + kup = btokup(chunk); + kup->ku_pagecnt = size / PAGE_SIZE; + crit_enter(); + goto done; + } + + /* + * Attempt to allocate out of an existing zone. First try the free list, + * then allocate out of unallocated space. If we find a good zone move + * it to the head of the list so later allocations find it quickly + * (we might have thousands of zones in the list). + * + * Note: zoneindex() will panic of size is too large. + */ + zi = zoneindex(&size); + KKASSERT(zi < NZONES); + crit_enter(); + if ((z = slgd->ZoneAry[zi]) != NULL) { + KKASSERT(z->z_NFree > 0); + + /* + * Remove us from the ZoneAry[] when we become empty + */ + if (--z->z_NFree == 0) { + slgd->ZoneAry[zi] = z->z_Next; + z->z_Next = NULL; + } + + /* + * Locate a chunk in a free page. This attempts to localize + * reallocations into earlier pages without us having to sort + * the chunk list. A chunk may still overlap a page boundary. + */ + while (z->z_FirstFreePg < ZonePageCount) { + if ((chunk = z->z_PageAry[z->z_FirstFreePg]) != NULL) { +#ifdef DIAGNOSTIC + /* + * Diagnostic: c_Next is not total garbage. + */ + KKASSERT(chunk->c_Next == NULL || + ((intptr_t)chunk->c_Next & IN_SAME_PAGE_MASK) == + ((intptr_t)chunk & IN_SAME_PAGE_MASK)); +#endif + if ((uintptr_t)chunk < 0xC0000000U) + panic("chunk %p FFPG %d/%d", chunk, z->z_FirstFreePg, ZonePageCount); + if (chunk->c_Next && (uintptr_t)chunk->c_Next < 0xC0000000U) + panic("chunkNEXT %p %p FFPG %d/%d", chunk, chunk->c_Next, z->z_FirstFreePg, ZonePageCount); + z->z_PageAry[z->z_FirstFreePg] = chunk->c_Next; + goto done; + } + ++z->z_FirstFreePg; + } + + /* + * Never before used memory is available at the UAlloc. This + * memory has already been zero'd. + */ + chunk = (SLChunk *)((char *)z + z->z_UAlloc); + z->z_UAlloc += size; + KKASSERT(z->z_UAlloc <= ZoneSize); + flags &= ~M_ZERO; + goto done; + } + + /* + * If all zones are exhausted we need to allocate a new zone for this + * index. Use M_ZERO to take advantage of pre-zerod pages. Also see + * UAlloc use above in regards to M_ZERO. + * + * At least one subsystem, the tty code (see CROUND) expects power-of-2 + * allocations to be power-of-2 aligned. We maintain compatibility by + * adjusting the base offset below. + */ + { + int off; + + if ((z = slgd->FreeZones) != NULL) { + slgd->FreeZones = z->z_Next; + --slgd->NFreeZones; + bzero(z, sizeof(SLZone)); + } else { + z = kmem_slab_alloc(ZoneSize, ZoneSize, flags|M_ZERO); + if (z == NULL) + goto fail; + } + + /* + * Guarentee power-of-2 alignment for power-of-2-sized chunks. + * Otherwise just 8-byte align the data. + */ + if ((size | (size - 1)) + 1 == (size << 1)) + off = (sizeof(SLZone) + size - 1) & ~(size - 1); + else + off = (sizeof(SLZone) + MIN_CHUNK_MASK) & ~MIN_CHUNK_MASK; + z->z_Magic = ZALLOC_SLAB_MAGIC; + z->z_ZoneIndex = zi; + z->z_NMax = (ZoneSize - off) / size; + z->z_NFree = z->z_NMax - 1; + z->z_UAlloc = off + size; + z->z_ChunkSize = size; + z->z_FirstFreePg = ZonePageCount; + z->z_Cpu = mycpu->gd_cpuid; + chunk = (SLChunk *)((char *)z + off); + z->z_Next = slgd->ZoneAry[zi]; + slgd->ZoneAry[zi] = z; + flags &= ~M_ZERO; /* already zero'd */ + } +done: + crit_exit(); + if (flags & M_ZERO) + bzero(chunk, size); + ++type->ks_inuse; + type->ks_memuse += size; + return(chunk); +fail: + crit_exit(); + return(NULL); +} + +void * +realloc(void *ptr, unsigned long size, struct malloc_type *type, int flags) +{ + SLZone *z; + void *nptr; + unsigned long osize; + + if (ptr == NULL || ptr == ZERO_LENGTH_PTR) + return(malloc(size, type, flags)); + if (size == 0) { + free(ptr, type); + return(NULL); + } + + /* + * Handle oversized allocations. XXX we really should require that a + * size be passed to free() instead of this nonsense. + */ + { + struct kmemusage *kup; + + kup = btokup(ptr); + if (kup->ku_pagecnt) { + osize = kup->ku_pagecnt << PAGE_SHIFT; + if (osize == round_page(size)) + return(ptr); + if ((nptr = malloc(size, type, flags)) == NULL) + return(NULL); + bcopy(ptr, nptr, min(size, osize)); + free(ptr, type); + return(nptr); + } + } + + /* + * Get the original allocation's zone. If the new request winds up + * using the same chunk size we do not have to do anything. + */ + z = (SLZone *)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); + KKASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); + + zoneindex(&size); + if (z->z_ChunkSize == size) + return(ptr); + + /* + * Allocate memory for the new request size. Note that zoneindex has + * already adjusted the request size to the appropriate chunk size, which + * should optimize our bcopy(). Then copy and return the new pointer. + */ + if ((nptr = malloc(size, type, flags)) == NULL) + return(NULL); + bcopy(ptr, nptr, min(size, z->z_ChunkSize)); + free(ptr, type); + return(nptr); +} + +/* + * free() (SLAB ALLOCATOR) + * + * Free the specified chunk of memory. The byte count is not strictly + * required but if DIAGNOSTIC is set we use it as a sanity check. + */ +static +void +free_remote(void *ptr) +{ + free(ptr, *(struct malloc_type **)ptr); +} + +void +free(void *ptr, struct malloc_type *type) +{ + SLZone *z; + SLChunk *chunk; + SLGlobalData *slgd; + int pgno; + + slgd = &mycpu->gd_slab; + + /* + * Handle special 0-byte allocations + */ + if (ptr == ZERO_LENGTH_PTR) + return; + + /* + * Handle oversized allocations. XXX we really should require that a + * size be passed to free() instead of this nonsense. + */ + { + struct kmemusage *kup; + unsigned long size; + + kup = btokup(ptr); + if (kup->ku_pagecnt) { + size = kup->ku_pagecnt << PAGE_SHIFT; + kup->ku_pagecnt = 0; + --type->ks_inuse; + type->ks_memuse -= size; +#ifdef INVARIANTS + KKASSERT(sizeof(weirdary) <= size); + bcopy(weirdary, ptr, sizeof(weirdary)); +#endif + kmem_slab_free(ptr, size); /* may block */ + return; + } + } + + /* + * Zone case. Figure out the zone based on the fact that it is + * ZoneSize aligned. + */ + z = (SLZone *)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); + KKASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); + + /* + * If we do not own the zone then forward the request to the + * cpu that does. The freeing code does not need the byte count + * unless DIAGNOSTIC is set. + */ + if (z->z_Cpu != mycpu->gd_cpuid) { + *(struct malloc_type **)ptr = type; + lwkt_send_ipiq(z->z_Cpu, free_remote, ptr); + return; + } + + if (type->ks_magic != M_MAGIC) + panic("free: malloc type lacks magic"); + + crit_enter(); + pgno = ((char *)ptr - (char *)z) >> PAGE_SHIFT; + chunk = ptr; + +#ifdef DIAGNOSTIC + /* + * Diagnostic: attempt to detect a double-free (not perfect). + */ + if (((intptr_t)chunk->c_Next - (intptr_t)z) >> PAGE_SHIFT == pgno) { + SLChunk *scan; + for (scan = z->z_PageAry[pgno]; scan; scan = scan->c_Next) { + if (scan == chunk) + panic("Double free at %p", chunk); + } + } +#endif + + /* + * Put weird data into the memory to detect modifications after freeing, + * illegal pointer use after freeing (we should fault on the odd address), + * and so forth. XXX needs more work, see the old malloc code. + */ +#ifdef INVARIANTS + if (z->z_ChunkSize < sizeof(weirdary)) + bcopy(weirdary, chunk, z->z_ChunkSize); + else + bcopy(weirdary, chunk, sizeof(weirdary)); +#endif + + /* + * Add this free non-zero'd chunk to a linked list for reuse, adjust + * z_FirstFreePg. + */ + if ((uintptr_t)chunk < 0xC0000000U) + panic("BADFREE %p\n", chunk); +#if 0 + if (type->ks_inuse == 34 && type->ks_memuse == 600 && (uint32_t)ptr == 0xc11600b8) + Debugger("Freeing"); +#endif + chunk->c_Next = z->z_PageAry[pgno]; + z->z_PageAry[pgno] = chunk; + if (chunk->c_Next && (uintptr_t)chunk->c_Next < 0xC0000000U) + panic("BADFREE2"); + if (z->z_FirstFreePg > pgno) + z->z_FirstFreePg = pgno; + + /* + * Bump the number of free chunks. If it becomes non-zero the zone + * must be added back onto the appropriate list. + */ + if (z->z_NFree++ == 0) { + z->z_Next = slgd->ZoneAry[z->z_ZoneIndex]; + slgd->ZoneAry[z->z_ZoneIndex] = z; + } + + --type->ks_inuse; + type->ks_memuse -= z->z_ChunkSize; + + /* + * If the zone becomes totally free, and there are other zones we + * can allocate from, move this zone to the FreeZones list. Implement + * hysteresis on the FreeZones list to improve performance. + * + * XXX try not to block on the kernel_map lock. + */ + if (z->z_NFree == z->z_NMax && + (z->z_Next || slgd->ZoneAry[z->z_ZoneIndex] != z) + ) { + SLZone **pz; + + for (pz = &slgd->ZoneAry[z->z_ZoneIndex]; z != *pz; pz = &(*pz)->z_Next) + ; + *pz = z->z_Next; + z->z_Magic = -1; + if (slgd->NFreeZones == ZONE_RELS_THRESH && + lockstatus(&kernel_map->lock, NULL) == 0) { + SLZone *oz; + + z->z_Next = slgd->FreeZones->z_Next; + oz = slgd->FreeZones; + slgd->FreeZones = z; + kmem_slab_free(oz, ZoneSize); /* may block */ + } else { + z->z_Next = slgd->FreeZones; + slgd->FreeZones = z; + ++slgd->NFreeZones; + } + } + crit_exit(); +} + +/* + * kmem_slab_alloc() + * + * Directly allocate and wire kernel memory in PAGE_SIZE chunks with the + * specified alignment. M_* flags are expected in the flags field. + * + * Alignment must be a multiple of PAGE_SIZE. + * + * NOTE! XXX For the moment we use vm_map_entry_reserve/release(), + * but when we move zalloc() over to use this function as its backend + * we will have to switch to kreserve/krelease and call reserve(0) + * after the new space is made available. + */ +static void * +kmem_slab_alloc(vm_size_t size, vm_offset_t align, int flags) +{ + vm_size_t i; + vm_offset_t addr; + vm_offset_t offset; + int count; + vm_map_t map = kernel_map; + + size = round_page(size); + addr = vm_map_min(map); + + /* + * Reserve properly aligned space from kernel_map + */ + count = vm_map_entry_reserve(MAP_RESERVE_COUNT); + crit_enter(); + vm_map_lock(map); + if (vm_map_findspace(map, vm_map_min(map), size, align, &addr)) { + vm_map_unlock(map); + if ((flags & (M_NOWAIT|M_NULLOK)) == 0) + panic("kmem_slab_alloc(): kernel_map ran out of space!"); + crit_exit(); + vm_map_entry_release(count); + return(NULL); + } + offset = addr - VM_MIN_KERNEL_ADDRESS; + vm_object_reference(kernel_object); + vm_map_insert(map, &count, + kernel_object, offset, addr, addr + size, + VM_PROT_ALL, VM_PROT_ALL, 0); + + /* + * Allocate the pages. Do not mess with the PG_ZERO flag yet. + */ + for (i = 0; i < size; i += PAGE_SIZE) { + vm_page_t m; + vm_pindex_t idx = OFF_TO_IDX(offset + i); + int zero = (flags & M_ZERO) ? VM_ALLOC_ZERO : 0; + + if ((flags & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT) + m = vm_page_alloc(kernel_object, idx, VM_ALLOC_INTERRUPT|zero); + else + m = vm_page_alloc(kernel_object, idx, VM_ALLOC_SYSTEM|zero); + if (m == NULL) { + if ((flags & M_NOWAIT) == 0) { + vm_map_unlock(map); + vm_wait(); + vm_map_lock(map); + i -= PAGE_SIZE; /* retry */ + continue; + } + while (i != 0) { + i -= PAGE_SIZE; + m = vm_page_lookup(kernel_object, OFF_TO_IDX(offset + i)); + vm_page_free(m); + } + vm_map_delete(map, addr, addr + size, &count); + vm_map_unlock(map); + crit_exit(); + vm_map_entry_release(count); + return(NULL); + } + } + + /* + * Mark the map entry as non-pageable using a routine that allows us to + * populate the underlying pages. + */ + vm_map_set_wired_quick(map, addr, size, &count); + crit_exit(); + + /* + * Enter the pages into the pmap and deal with PG_ZERO and M_ZERO. + */ + for (i = 0; i < size; i += PAGE_SIZE) { + vm_page_t m; + + m = vm_page_lookup(kernel_object, OFF_TO_IDX(offset + i)); + m->valid = VM_PAGE_BITS_ALL; + vm_page_wire(m); + vm_page_wakeup(m); + pmap_enter(kernel_pmap, addr + i, m, VM_PROT_ALL, 1); + if ((m->flags & PG_ZERO) == 0 && (flags & M_ZERO)) + bzero((char *)addr + i, PAGE_SIZE); + vm_page_flag_clear(m, PG_ZERO); + vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE | PG_REFERENCED); + } + vm_map_unlock(map); + vm_map_entry_release(count); + return((void *)addr); +} + +static void +kmem_slab_free(void *ptr, vm_size_t size) +{ + crit_enter(); + vm_map_remove(kernel_map, (vm_offset_t)ptr, (vm_offset_t)ptr + size); + crit_exit(); +} + +#endif diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c index 43474e992c..a80f7ed378 100644 --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -29,7 +29,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/kern/sys_process.c,v 1.51.2.6 2003/01/08 03:06:45 kan Exp $ - * $DragonFly: src/sys/kern/sys_process.c,v 1.11 2003/08/11 17:07:30 drhodus Exp $ + * $DragonFly: src/sys/kern/sys_process.c,v 1.12 2003/08/27 01:43:07 dillon Exp $ */ #include @@ -79,7 +79,7 @@ pread (struct proc *procp, unsigned int addr, unsigned int *retval) { if (rv != KERN_SUCCESS) return EINVAL; - vm_map_lookup_done (tmap, out_entry); + vm_map_lookup_done (tmap, out_entry, 0); /* Find space in kernel_map for the page we're interested in */ rv = vm_map_find (kernel_map, object, IDX_TO_OFF(pindex), @@ -157,7 +157,7 @@ pwrite (struct proc *procp, unsigned int addr, unsigned int datum) { * Okay, we've got the page. Let's release tmap. */ - vm_map_lookup_done (tmap, out_entry); + vm_map_lookup_done (tmap, out_entry, 0); /* * Fault the page in... diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 0c4a29d5b6..1da5a43c0b 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -12,7 +12,7 @@ * John S. Dyson. * * $FreeBSD: src/sys/kern/vfs_bio.c,v 1.242.2.20 2003/05/28 18:38:10 alc Exp $ - * $DragonFly: src/sys/kern/vfs_bio.c,v 1.13 2003/08/26 21:09:02 rob Exp $ + * $DragonFly: src/sys/kern/vfs_bio.c,v 1.14 2003/08/27 01:43:07 dillon Exp $ */ /* @@ -442,15 +442,20 @@ bufinit(void) static void bfreekva(struct buf * bp) { + int count; + if (bp->b_kvasize) { ++buffreekvacnt; + count = vm_map_entry_reserve(MAP_RESERVE_COUNT); vm_map_lock(buffer_map); bufspace -= bp->b_kvasize; vm_map_delete(buffer_map, (vm_offset_t) bp->b_kvabase, - (vm_offset_t) bp->b_kvabase + bp->b_kvasize + (vm_offset_t) bp->b_kvabase + bp->b_kvasize, + &count ); vm_map_unlock(buffer_map); + vm_map_entry_release(count); bp->b_kvasize = 0; bufspacewakeup(); } @@ -1707,9 +1712,11 @@ restart: if (maxsize != bp->b_kvasize) { vm_offset_t addr = 0; + int count; bfreekva(bp); + count = vm_map_entry_reserve(MAP_RESERVE_COUNT); vm_map_lock(buffer_map); if (vm_map_findspace(buffer_map, @@ -1720,6 +1727,7 @@ restart: * must defragment the map. */ vm_map_unlock(buffer_map); + vm_map_entry_release(count); ++bufdefragcnt; defrag = 1; bp->b_flags |= B_INVAL; @@ -1727,7 +1735,8 @@ restart: goto restart; } if (addr) { - vm_map_insert(buffer_map, NULL, 0, + vm_map_insert(buffer_map, &count, + NULL, 0, addr, addr + maxsize, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); @@ -1737,6 +1746,7 @@ restart: ++bufreusecnt; } vm_map_unlock(buffer_map); + vm_map_entry_release(count); } bp->b_data = bp->b_kvabase; } diff --git a/sys/platform/pc32/i386/mp_machdep.c b/sys/platform/pc32/i386/mp_machdep.c index a8c74c550e..d1dc553190 100644 --- a/sys/platform/pc32/i386/mp_machdep.c +++ b/sys/platform/pc32/i386/mp_machdep.c @@ -23,7 +23,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.115.2.15 2003/03/14 21:22:35 jhb Exp $ - * $DragonFly: src/sys/platform/pc32/i386/mp_machdep.c,v 1.15 2003/08/26 21:42:18 rob Exp $ + * $DragonFly: src/sys/platform/pc32/i386/mp_machdep.c,v 1.16 2003/08/27 01:43:07 dillon Exp $ */ #include "opt_cpu.h" @@ -2193,6 +2193,9 @@ install_ap_tramp(u_int boot_addr) * to accomplish this. This is necessary because of the nuances * of the different hardware we might encounter. It ain't pretty, * but it seems to work. + * + * NOTE: eventually an AP gets to ap_init(), which is called just + * before the AP goes into the LWKT scheduler's idle loop. */ static int start_ap(int logical_cpu, u_int boot_addr) @@ -2461,6 +2464,15 @@ ap_init(void) smp_active = 1; /* historic */ } + /* + * AP helper function for kernel memory support. This will create + * a memory reserve for the AP that is necessary to avoid certain + * memory deadlock situations, such as when the kernel_map needs + * a vm_map_entry and zalloc has no free entries and tries to allocate + * a new one from the ... kernel_map :-) + */ + kmem_cpu_init(); + /* * Startup helper thread(s) one per cpu. */ diff --git a/sys/sys/globaldata.h b/sys/sys/globaldata.h index 7a08161af2..00eda3d995 100644 --- a/sys/sys/globaldata.h +++ b/sys/sys/globaldata.h @@ -24,12 +24,14 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/globaldata.h,v 1.11.2.1 2000/05/16 06:58:10 dillon Exp $ - * $DragonFly: src/sys/sys/globaldata.h,v 1.15 2003/08/12 02:36:15 dillon Exp $ + * $DragonFly: src/sys/sys/globaldata.h,v 1.16 2003/08/27 01:43:07 dillon Exp $ */ #ifndef _SYS_GLOBALDATA_H_ #define _SYS_GLOBALDATA_H_ +#if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) + #ifndef _SYS_TIME_H_ #include /* struct timeval */ #endif @@ -39,6 +41,9 @@ #ifndef _SYS_THREAD_H_ #include /* struct thread */ #endif +#ifndef _SYS_SLABALLOC_H_ +#include /* SLGlobalData */ +#endif /* * This structure maps out the global data that needs to be kept on a @@ -60,10 +65,16 @@ * further checks are necessary. Interrupts are typically managed on a * per-processor basis at least until you leave a critical section, but * may then be scheduled to other cpus. + * + * gd_vme_avail and gd_vme_base cache free vm_map_entry structures for use + * in various vm_map related operations. gd_vme_avail is *NOT* a count of + * the number of structures in the cache but is instead a count of the number + * of unreserved structures in the cache. See vm_map_entry_reserve(). */ union sysmsg; struct privatespace; +struct vm_map_entry; struct globaldata { struct privatespace *gd_prvspace; /* self-reference */ @@ -85,6 +96,10 @@ struct globaldata { struct lwkt_ipiq *gd_ipiq; struct thread gd_schedthread; struct thread gd_idlethread; + SLGlobalData gd_slab; /* slab allocator */ + int gd_vme_kdeficit; /* vm_map_entry reservation */ + int gd_vme_avail; /* vm_map_entry reservation */ + struct vm_map_entry *gd_vme_base; /* vm_map_entry reservation */ /* extended by */ }; @@ -103,6 +118,8 @@ typedef struct globaldata *globaldata_t; #define RQF_AST_RESCHED (1 << RQB_AST_RESCHED) #define RQF_AST_MASK (RQF_AST_OWEUPC|RQF_AST_SIGNAL|RQF_AST_RESCHED) +#endif + #ifdef _KERNEL struct globaldata *globaldata_find(int cpu); #endif diff --git a/sys/sys/malloc.h b/sys/sys/malloc.h index 0fb6ddb9a6..31f9638655 100644 --- a/sys/sys/malloc.h +++ b/sys/sys/malloc.h @@ -32,18 +32,24 @@ * * @(#)malloc.h 8.5 (Berkeley) 5/3/95 * $FreeBSD: src/sys/sys/malloc.h,v 1.48.2.2 2002/03/16 02:19:16 archie Exp $ - * $DragonFly: src/sys/sys/malloc.h,v 1.6 2003/08/25 19:50:33 dillon Exp $ + * $DragonFly: src/sys/sys/malloc.h,v 1.7 2003/08/27 01:43:07 dillon Exp $ */ #ifndef _SYS_MALLOC_H_ #define _SYS_MALLOC_H_ +#ifdef _KERNEL + #ifndef _MACHINE_VMPARAM_H_ #include /* for VM_MIN_KERNEL_ADDRESS */ #endif #define splmem splhigh +#endif + +#if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) + /* * flags to malloc. */ @@ -51,6 +57,7 @@ #define M_WAITOK 0x0002 /* wait for resources */ #define M_ZERO 0x0100 /* bzero() the allocation */ #define M_USE_RESERVE 0x0200 /* can alloc out of reserve memory */ +#define M_NULLOK 0x0400 /* ok to return NULL in M_WAITOK case */ #define M_MAGIC 877983977 /* time when first defined :-) */ @@ -68,6 +75,8 @@ struct malloc_type { u_short ks_mapblocks; /* number of times blocked for kernel map */ }; +#endif + #ifdef _KERNEL #define MALLOC_DEFINE(type, shortdesc, longdesc) \ struct malloc_type type[1] = { \ @@ -87,6 +96,8 @@ MALLOC_DECLARE(M_IP6OPT); /* for INET6 */ MALLOC_DECLARE(M_IP6NDP); /* for INET6 */ #endif /* _KERNEL */ +#if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) + /* * Array of descriptors that describe the contents of each page */ @@ -114,7 +125,6 @@ struct kmembuckets { long kb_couldfree; /* over high water mark and could free */ }; -#ifdef _KERNEL #define MINALLOCSIZE (1 << MINBUCKET) #define BUCKETINDX(size) \ ((size) <= (MINALLOCSIZE * 128) \ @@ -149,6 +159,10 @@ struct kmembuckets { ? (MINBUCKET + 14) \ : (MINBUCKET + 15)) +#endif + +#ifdef _KERNEL + /* * Turn virtual addresses into kmem map indices */ @@ -188,6 +202,7 @@ void *realloc (void *addr, unsigned long size, struct malloc_type *type, int flags); void *reallocf (void *addr, unsigned long size, struct malloc_type *type, int flags); + #endif /* _KERNEL */ #endif /* !_SYS_MALLOC_H_ */ diff --git a/sys/sys/slaballoc.h b/sys/sys/slaballoc.h new file mode 100644 index 0000000000..e1a5dd5b70 --- /dev/null +++ b/sys/sys/slaballoc.h @@ -0,0 +1,98 @@ +/* + * KERN_SLABALLOC.H - Kernel SLAB memory allocator + * + * Copyright (c) 2003 Matthew Dillon + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/sys/slaballoc.h,v 1.1 2003/08/27 01:43:07 dillon Exp $ + */ + +#ifndef _SYS_SLABALLOC_H_ +#define _SYS_SLABALLOC_H_ + +#if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) + +#ifndef _SYS_MALLOC_H_ +#include +#endif + +/* + * Note that any allocations which are exact multiples of PAGE_SIZE, or + * which are >= ZALLOC_ZONE_LIMIT, will fall through to the kmem subsystem. + */ +#define ZALLOC_ZONE_LIMIT (16 * 1024) /* max slab-managed alloc */ +#define ZALLOC_MIN_ZONE_SIZE (32 * 1024) /* minimum zone size */ +#define ZALLOC_MAX_ZONE_SIZE (128 * 1024) /* maximum zone size */ +#define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ + +#if ZALLOC_ZONE_LIMIT == 16384 +#define NZONES 72 +#elif ZALLOC_ZONE_LIMIT == 32768 +#define NZONES 80 +#else +#error "I couldn't figure out NZONES" +#endif + +/* + * Chunk structure for free elements + */ +typedef struct SLChunk { + struct SLChunk *c_Next; +} SLChunk; + +/* + * The IN-BAND zone header is placed at the beginning of each zone. + */ +typedef struct SLZone { + int32_t z_Magic; /* magic number for sanity check */ + int z_Cpu; /* which cpu owns this zone? */ + int z_NFree; /* total free chunks / ualloc space in zone */ + struct SLZone *z_Next; /* ZoneAry[] link if z_NFree non-zero */ + int z_NMax; /* maximum free chunks */ + int z_UAlloc; /* allocation offset into uninitialized space */ + int z_ChunkSize; /* chunk size for validation */ + int z_FirstFreePg; /* chunk list on a page-by-page basis */ + int z_ZoneIndex; + SLChunk *z_PageAry[ZALLOC_MAX_ZONE_SIZE / PAGE_SIZE]; +} SLZone; + +typedef struct SLGlobalData { + SLZone *ZoneAry[NZONES]; /* linked list of zones NFree > 0 */ + SLZone *FreeZones; /* whole zones that have become free */ + int NFreeZones; /* free zone count */ + struct malloc_type ZoneInfo; /* stats on meta-zones allocated */ +} SLGlobalData; + +#endif + +#ifdef _KERNEL + +void slab_init(void); +void *slab_alloc(struct malloc_type *info, uintptr_t bytes, int flags); +void slab_free(void *ptr, struct malloc_type *info); + +#endif /* _KERNEL */ + +#endif + diff --git a/sys/vfs/procfs/procfs_mem.c b/sys/vfs/procfs/procfs_mem.c index 41e0e1f40c..2090bdc25d 100644 --- a/sys/vfs/procfs/procfs_mem.c +++ b/sys/vfs/procfs/procfs_mem.c @@ -38,7 +38,7 @@ * @(#)procfs_mem.c 8.5 (Berkeley) 6/15/94 * * $FreeBSD: src/sys/miscfs/procfs/procfs_mem.c,v 1.46.2.3 2002/01/22 17:22:59 nectar Exp $ - * $DragonFly: src/sys/vfs/procfs/procfs_mem.c,v 1.5 2003/08/20 09:56:33 rob Exp $ + * $DragonFly: src/sys/vfs/procfs/procfs_mem.c,v 1.6 2003/08/27 01:43:07 dillon Exp $ */ /* @@ -183,7 +183,7 @@ procfs_rwmem(curp, p, uio) */ object = NULL; - vm_map_lookup_done(tmap, out_entry); + vm_map_lookup_done(tmap, out_entry, 0); break; } @@ -199,7 +199,7 @@ procfs_rwmem(curp, p, uio) * it. */ vm_object_reference(object); - vm_map_lookup_done(tmap, out_entry); + vm_map_lookup_done(tmap, out_entry, 0); pmap_kenter(kva, VM_PAGE_TO_PHYS(m)); diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h index 3d9d19a72e..f1078663e2 100644 --- a/sys/vm/vm_extern.h +++ b/sys/vm/vm_extern.h @@ -32,7 +32,7 @@ * * @(#)vm_extern.h 8.2 (Berkeley) 1/12/94 * $FreeBSD: src/sys/vm/vm_extern.h,v 1.46.2.3 2003/01/13 22:51:17 dillon Exp $ - * $DragonFly: src/sys/vm/vm_extern.h,v 1.4 2003/08/20 08:03:01 rob Exp $ + * $DragonFly: src/sys/vm/vm_extern.h,v 1.5 2003/08/27 01:43:08 dillon Exp $ */ #ifndef _VM_EXTERN_H_ @@ -70,6 +70,7 @@ vm_offset_t kmem_alloc_wait (vm_map_t, vm_size_t); void kmem_free (vm_map_t, vm_offset_t, vm_size_t); void kmem_free_wakeup (vm_map_t, vm_offset_t, vm_size_t); void kmem_init (vm_offset_t, vm_offset_t); +void kmem_cpu_init (void); vm_offset_t kmem_malloc (vm_map_t, vm_size_t, boolean_t); vm_map_t kmem_suballoc (vm_map_t, vm_offset_t *, vm_offset_t *, vm_size_t); void munmapfd (struct proc *, int); diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 9ae4c3a027..8221840595 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -67,7 +67,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/vm/vm_fault.c,v 1.108.2.8 2002/02/26 05:49:27 silby Exp $ - * $DragonFly: src/sys/vm/vm_fault.c,v 1.6 2003/08/20 08:03:01 rob Exp $ + * $DragonFly: src/sys/vm/vm_fault.c,v 1.7 2003/08/27 01:43:08 dillon Exp $ */ /* @@ -127,7 +127,7 @@ static __inline void unlock_map(struct faultstate *fs) { if (fs->lookup_still_valid) { - vm_map_lookup_done(fs->map, fs->entry); + vm_map_lookup_done(fs->map, fs->entry, 0); fs->lookup_still_valid = FALSE; } } diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c index 21877864ce..e8bc206cdf 100644 --- a/sys/vm/vm_init.c +++ b/sys/vm/vm_init.c @@ -62,7 +62,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/vm/vm_init.c,v 1.20 1999/10/29 18:09:29 phk Exp $ - * $DragonFly: src/sys/vm/vm_init.c,v 1.3 2003/08/20 08:03:01 rob Exp $ + * $DragonFly: src/sys/vm/vm_init.c,v 1.4 2003/08/27 01:43:08 dillon Exp $ */ /* @@ -113,6 +113,7 @@ vm_mem_init(dummy) vm_object_init(); vm_map_startup(); kmem_init(virtual_avail, virtual_end); + kmem_cpu_init(); pmap_init(avail_start, avail_end); vm_pager_init(); } diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 507e380ea9..4b99c1f815 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -62,7 +62,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/vm/vm_kern.c,v 1.61.2.2 2002/03/12 18:25:26 tegge Exp $ - * $DragonFly: src/sys/vm/vm_kern.c,v 1.7 2003/08/25 19:50:33 dillon Exp $ + * $DragonFly: src/sys/vm/vm_kern.c,v 1.8 2003/08/27 01:43:08 dillon Exp $ */ /* @@ -148,22 +148,21 @@ kmem_alloc_nofault(map, size) * or a submap. */ vm_offset_t -kmem_alloc(map, size) - vm_map_t map; - vm_size_t size; +kmem_alloc(vm_map_t map, vm_size_t size) { vm_offset_t addr; vm_offset_t offset; vm_offset_t i; + int count; size = round_page(size); + count = vm_map_entry_kreserve(MAP_RESERVE_COUNT); + /* * Use the kernel object for wired-down kernel pages. Assume that no * region of the kernel object is referenced more than once. - */ - - /* + * * Locate sufficient space in the map. This will give us the final * virtual address for the new memory, and thus will tell us the * offset within the kernel map. @@ -171,13 +170,16 @@ kmem_alloc(map, size) vm_map_lock(map); if (vm_map_findspace(map, vm_map_min(map), size, 1, &addr)) { vm_map_unlock(map); + vm_map_entry_krelease(count); return (0); } offset = addr - VM_MIN_KERNEL_ADDRESS; vm_object_reference(kernel_object); - vm_map_insert(map, kernel_object, offset, addr, addr + size, + vm_map_insert(map, &count, + kernel_object, offset, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0); vm_map_unlock(map); + vm_map_entry_krelease(count); /* * Guarantee that there are pages already in this object before @@ -299,15 +301,13 @@ kmem_suballoc(parent, min, max, size) * I have not verified that it actually does not block. */ vm_offset_t -kmem_malloc(map, size, flags) - vm_map_t map; - vm_size_t size; - int flags; +kmem_malloc(vm_map_t map, vm_size_t size, int flags) { vm_offset_t offset, i; vm_map_entry_t entry; vm_offset_t addr; vm_page_t m; + int count; #if defined(NO_KMEM_MAP) if (map != kernel_map && map != mb_map) @@ -326,8 +326,10 @@ kmem_malloc(map, size, flags) * offset within the kernel map. */ vm_map_lock(map); + count = vm_map_entry_kreserve(MAP_RESERVE_COUNT); if (vm_map_findspace(map, vm_map_min(map), size, 1, &addr)) { vm_map_unlock(map); + vm_map_entry_krelease(count); if (map == mb_map) { mb_map_full = TRUE; printf("Out of mbuf clusters - adjust NMBCLUSTERS or increase maxusers!\n"); @@ -345,7 +347,8 @@ kmem_malloc(map, size, flags) } offset = addr - VM_MIN_KERNEL_ADDRESS; vm_object_reference(kmem_object); - vm_map_insert(map, kmem_object, offset, addr, addr + size, + vm_map_insert(map, &count, + kmem_object, offset, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0); for (i = 0; i < size; i += PAGE_SIZE) { @@ -387,8 +390,9 @@ retry: OFF_TO_IDX(offset + i)); vm_page_free(m); } - vm_map_delete(map, addr, addr + size); + vm_map_delete(map, addr, addr + size, &count); vm_map_unlock(map); + vm_map_entry_krelease(count); return (0); } vm_page_flag_clear(m, PG_ZERO); @@ -407,7 +411,7 @@ retry: panic("kmem_malloc: entry not found or misaligned"); entry->wired_count = 1; - vm_map_simplify_entry(map, entry); + vm_map_simplify_entry(map, entry, &count); /* * Loop thru pages, entering them in the pmap. (We cannot add them to @@ -425,6 +429,7 @@ retry: vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE | PG_REFERENCED); } vm_map_unlock(map); + vm_map_entry_krelease(count); return (addr); } @@ -439,14 +444,15 @@ retry: */ vm_offset_t -kmem_alloc_wait(map, size) - vm_map_t map; - vm_size_t size; +kmem_alloc_wait(vm_map_t map, vm_size_t size) { vm_offset_t addr; + int count; size = round_page(size); + count = vm_map_entry_kreserve(MAP_RESERVE_COUNT); + for (;;) { /* * To make this work for more than one map, use the map's lock @@ -457,14 +463,18 @@ kmem_alloc_wait(map, size) break; /* no space now; see if we can ever get space */ if (vm_map_max(map) - vm_map_min(map) < size) { + vm_map_entry_krelease(count); vm_map_unlock(map); return (0); } vm_map_unlock(map); tsleep(map, 0, "kmaw", 0); } - vm_map_insert(map, NULL, (vm_offset_t) 0, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0); + vm_map_insert(map, &count, + NULL, (vm_offset_t) 0, + addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0); vm_map_unlock(map); + vm_map_entry_krelease(count); return (addr); } @@ -480,10 +490,14 @@ kmem_free_wakeup(map, addr, size) vm_offset_t addr; vm_size_t size; { + int count; + + count = vm_map_entry_kreserve(MAP_RESERVE_COUNT); vm_map_lock(map); - (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size)); + (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size), &count); wakeup(map); vm_map_unlock(map); + vm_map_entry_krelease(count); } /* @@ -493,22 +507,39 @@ kmem_free_wakeup(map, addr, size) * data, bss, and all space allocated thus far (`boostrap' data). The * new map will thus map the range between VM_MIN_KERNEL_ADDRESS and * `start' as allocated, and the range between `start' and `end' as free. + * + * Depend on the zalloc bootstrap cache to get our vm_map_entry_t. */ - void -kmem_init(start, end) - vm_offset_t start, end; +kmem_init(vm_offset_t start, vm_offset_t end) { vm_map_t m; + int count; m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end); vm_map_lock(m); /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ kernel_map = m; kernel_map->system_map = 1; - (void) vm_map_insert(m, NULL, (vm_offset_t) 0, + count = vm_map_entry_reserve(MAP_RESERVE_COUNT); + (void) vm_map_insert(m, &count, NULL, (vm_offset_t) 0, VM_MIN_KERNEL_ADDRESS, start, VM_PROT_ALL, VM_PROT_ALL, 0); /* ... and ending with the completion of the above `insert' */ vm_map_unlock(m); + vm_map_entry_release(count); +} + +/* + * kmem_cpu_init: + * + * Load up extra vm_map_entry structures in each cpu's globaldata + * cache. These allow us to expand the mapent zone for kernel_map. + * Without them we would get into a recursion deadlock trying to + * reserve map entries (reserve->zalloc->kmem_alloc->reserve->...) + */ +void +kmem_cpu_init(void) +{ + vm_map_entry_reserve(MAP_RESERVE_COUNT * 2); } diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index de0702c493..e3cd30043d 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -62,7 +62,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/vm/vm_map.c,v 1.187.2.19 2003/05/27 00:47:02 alc Exp $ - * $DragonFly: src/sys/vm/vm_map.c,v 1.10 2003/08/25 19:50:33 dillon Exp $ + * $DragonFly: src/sys/vm/vm_map.c,v 1.11 2003/08/27 01:43:08 dillon Exp $ */ /* @@ -91,6 +91,8 @@ #include #include +#include + /* * Virtual memory maps provide for the mapping, protection, * and sharing of virtual memory objects. In addition, @@ -131,24 +133,23 @@ * maps and requires map entries. */ -static struct vm_zone kmapentzone_store, mapentzone_store, mapzone_store; -static vm_zone_t mapentzone, kmapentzone, mapzone, vmspace_zone; -static struct vm_object kmapentobj, mapentobj, mapobj; +static struct vm_zone mapentzone_store, mapzone_store; +static vm_zone_t mapentzone, mapzone, vmspace_zone; +static struct vm_object mapentobj, mapobj; static struct vm_map_entry map_entry_init[MAX_MAPENT]; -static struct vm_map_entry kmap_entry_init[MAX_KMAPENT]; static struct vm_map map_init[MAX_KMAP]; -static void _vm_map_clip_end (vm_map_t, vm_map_entry_t, vm_offset_t); -static void _vm_map_clip_start (vm_map_t, vm_map_entry_t, vm_offset_t); -static vm_map_entry_t vm_map_entry_create (vm_map_t); -static void vm_map_entry_delete (vm_map_t, vm_map_entry_t); -static void vm_map_entry_dispose (vm_map_t, vm_map_entry_t); +static vm_map_entry_t vm_map_entry_create(vm_map_t map, int *); +static void vm_map_entry_dispose (vm_map_t map, vm_map_entry_t entry, int *); +static void _vm_map_clip_end (vm_map_t, vm_map_entry_t, vm_offset_t, int *); +static void _vm_map_clip_start (vm_map_t, vm_map_entry_t, vm_offset_t, int *); +static void vm_map_entry_delete (vm_map_t, vm_map_entry_t, int *); static void vm_map_entry_unwire (vm_map_t, vm_map_entry_t); static void vm_map_copy_entry (vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t); static void vm_map_split (vm_map_entry_t); -static void vm_map_unclip_range (vm_map_t map, vm_map_entry_t start_entry, vm_offset_t start, vm_offset_t end, int flags); +static void vm_map_unclip_range (vm_map_t map, vm_map_entry_t start_entry, vm_offset_t start, vm_offset_t end, int *count, int flags); void vm_map_startup() @@ -156,9 +157,6 @@ vm_map_startup() mapzone = &mapzone_store; zbootinit(mapzone, "MAP", sizeof (struct vm_map), map_init, MAX_KMAP); - kmapentzone = &kmapentzone_store; - zbootinit(kmapentzone, "KMAP ENTRY", sizeof (struct vm_map_entry), - kmap_entry_init, MAX_KMAPENT); mapentzone = &mapentzone_store; zbootinit(mapentzone, "MAP ENTRY", sizeof (struct vm_map_entry), map_entry_init, MAX_MAPENT); @@ -186,14 +184,10 @@ vmspace_alloc(min, max) } void -vm_init2(void) { - zinitna(kmapentzone, &kmapentobj, - NULL, 0, lmin((VM_MAX_KERNEL_ADDRESS - KERNBASE) / PAGE_SIZE, - vmstats.v_page_count) / 8, ZONE_INTERRUPT, 1); - zinitna(mapentzone, &mapentobj, - NULL, 0, 0, 0, 1); - zinitna(mapzone, &mapobj, - NULL, 0, 0, 0, 1); +vm_init2(void) +{ + zinitna(mapentzone, &mapentobj, NULL, 0, 0, ZONE_USE_RESERVE, 1); + zinitna(mapzone, &mapobj, NULL, 0, 0, 0, 1); vmspace_zone = zinit("VMSPACE", sizeof (struct vmspace), 0, 0, 3); pmap_init2(); vm_object_init2(); @@ -202,6 +196,8 @@ vm_init2(void) { static __inline void vmspace_dofree(struct vmspace *vm) { + int count; + /* * Make sure any SysV shm is freed, it might not have in * exit1() @@ -213,10 +209,12 @@ vmspace_dofree(struct vmspace *vm) * Delete all of the mappings and pages they hold, then call * the pmap module to reclaim anything left. */ + count = vm_map_entry_reserve(MAP_RESERVE_COUNT); vm_map_lock(&vm->vm_map); - (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset, - vm->vm_map.max_offset); + vm_map_delete(&vm->vm_map, vm->vm_map.min_offset, + vm->vm_map.max_offset, &count); vm_map_unlock(&vm->vm_map); + vm_map_entry_release(count); pmap_release(vmspace_pmap(vm)); zfree(vmspace_zone, vm); @@ -298,9 +296,7 @@ vmspace_swap_count(struct vmspace *vmspace) * the given lower and upper address bounds. */ vm_map_t -vm_map_create(pmap, min, max) - pmap_t pmap; - vm_offset_t min, max; +vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max) { vm_map_t result; @@ -316,9 +312,7 @@ vm_map_create(pmap, min, max) * The pmap is set elsewhere. */ void -vm_map_init(map, min, max) - struct vm_map *map; - vm_offset_t min, max; +vm_map_init(struct vm_map *map, vm_offset_t min, vm_offset_t max) { map->header.next = map->header.prev = &map->header; map->nentries = 0; @@ -333,25 +327,130 @@ vm_map_init(map, min, max) lockinit(&map->lock, 0, "thrd_sleep", 0, LK_NOPAUSE); } +/* + * vm_map_entry_reserve: + * + * Reserves vm_map_entry structures outside of the critical path + */ +int +vm_map_entry_reserve(int count) +{ + struct globaldata *gd = mycpu; + vm_map_entry_t entry; + + crit_enter(); + gd->gd_vme_avail -= count; + + /* + * Make sure we have enough structures in gd_vme_base to handle + * the reservation request. + */ + while (gd->gd_vme_avail < 0) { + entry = zalloc(mapentzone); + entry->next = gd->gd_vme_base; + gd->gd_vme_base = entry; + ++gd->gd_vme_avail; + } + crit_exit(); + return(count); +} + +/* + * vm_map_entry_release: + * + * Releases previously reserved vm_map_entry structures that were not + * used. If we have too much junk in our per-cpu cache clean some of + * it out. + */ +void +vm_map_entry_release(int count) +{ + struct globaldata *gd = mycpu; + vm_map_entry_t entry; + + crit_enter(); + gd->gd_vme_avail += count; + while (gd->gd_vme_avail > MAP_RESERVE_SLOP) { + entry = gd->gd_vme_base; + KKASSERT(entry != NULL); + gd->gd_vme_base = entry->next; + --gd->gd_vme_avail; + crit_exit(); + zfree(mapentzone, entry); + crit_enter(); + } + crit_exit(); +} + +/* + * vm_map_entry_kreserve: + * + * Reserve map entry structures for use in kernel_map or (if it exists) + * kmem_map. These entries have *ALREADY* been reserved on a per-cpu + * basis. + * + * XXX if multiple kernel map entries are used without any intervening + * use by another map the KKASSERT() may assert. + */ +int +vm_map_entry_kreserve(int count) +{ + struct globaldata *gd = mycpu; + + crit_enter(); + gd->gd_vme_kdeficit += count; + crit_exit(); + KKASSERT(gd->gd_vme_base != NULL); + return(count); +} + +/* + * vm_map_entry_krelease: + * + * Release previously reserved map entries for kernel_map or kmem_map + * use. This routine determines how many entries were actually used and + * replentishes the kernel reserve supply from vme_avail. + * + * If there is insufficient supply vme_avail will go negative, which is + * ok. We cannot safely call zalloc in this function without getting + * into a recursion deadlock. zalloc() will call vm_map_entry_reserve() + * to regenerate the lost entries. + */ +void +vm_map_entry_krelease(int count) +{ + struct globaldata *gd = mycpu; + + crit_enter(); + gd->gd_vme_kdeficit -= count; + gd->gd_vme_avail -= gd->gd_vme_kdeficit; /* can go negative */ + gd->gd_vme_kdeficit = 0; + crit_exit(); +} + /* * vm_map_entry_create: [ internal use only ] * * Allocates a VM map entry for insertion. No entry fields are filled - * in. this ruotine may be called from an interrupt. + * in. + * + * This routine may be called from an interrupt thread but not a FAST + * interrupt. This routine may recurse the map lock. */ static vm_map_entry_t -vm_map_entry_create(map) - vm_map_t map; +vm_map_entry_create(vm_map_t map, int *countp) { - vm_map_entry_t new_entry; + struct globaldata *gd = mycpu; + vm_map_entry_t entry; - if (map->system_map || !mapentzone) - new_entry = zalloc(kmapentzone); - else - new_entry = zalloc(mapentzone); - if (new_entry == NULL) - panic("vm_map_entry_create: kernel resources exhausted"); - return(new_entry); + KKASSERT(*countp > 0); + --*countp; + crit_enter(); + entry = gd->gd_vme_base; + KASSERT(entry != NULL, ("gd_vme_base NULL! count %d", *countp)); + gd->gd_vme_base = entry->next; + crit_exit(); + return(entry); } /* @@ -361,14 +460,15 @@ vm_map_entry_create(map) * function may be called from an interrupt. */ static void -vm_map_entry_dispose(map, entry) - vm_map_t map; - vm_map_entry_t entry; +vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry, int *countp) { - if (map->system_map || !mapentzone) - zfree(kmapentzone, entry); - else - zfree(mapentzone, entry); + struct globaldata *gd = mycpu; + + ++*countp; + crit_enter(); + entry->next = gd->gd_vme_base; + gd->gd_vme_base = entry; + crit_exit(); } @@ -497,13 +597,16 @@ vm_map_lookup_entry(map, address, entry) * map at the specified address range. The object's * size should match that of the address range. * - * Requires that the map be locked, and leaves it so. + * Requires that the map be locked, and leaves it so. Requires that + * sufficient vm_map_entry structures have been reserved and tracks + * the use via countp. * * If object is non-NULL, ref count must be bumped by caller * prior to making call to account for the new entry. */ int -vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, +vm_map_insert(vm_map_t map, int *countp, + vm_object_t object, vm_ooffset_t offset, vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max, int cow) { @@ -583,7 +686,7 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, (prev_entry->max_protection == max)) { map->size += (end - prev_entry->end); prev_entry->end = end; - vm_map_simplify_entry(map, prev_entry); + vm_map_simplify_entry(map, prev_entry, countp); return (KERN_SUCCESS); } @@ -609,7 +712,7 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, * Create a new entry */ - new_entry = vm_map_entry_create(map); + new_entry = vm_map_entry_create(map, countp); new_entry->start = start; new_entry->end = end; @@ -647,7 +750,7 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, /* * It may be possible to simplify the entry */ - vm_map_simplify_entry(map, new_entry); + vm_map_simplify_entry(map, new_entry, countp); #endif if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) { @@ -697,6 +800,7 @@ vm_map_findspace( else align_mask = align - 1; +retry: /* * Look for the first possible address; if there's already something * at this address, we have to start after it. @@ -743,13 +847,14 @@ vm_map_findspace( break; } SAVE_HINT(map, entry); - *addr = start; if (map == kernel_map) { vm_offset_t ksize; if ((ksize = round_page(start + length)) > kernel_vm_end) { pmap_growkernel(ksize); + goto retry; } } + *addr = start; return (0); } @@ -770,6 +875,7 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, { vm_offset_t start; int result; + int count; #if !defined(NO_KMEM_MAP) int s = 0; #endif @@ -781,10 +887,12 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, s = splvm(); #endif + count = vm_map_entry_reserve(MAP_RESERVE_COUNT); vm_map_lock(map); if (find_space) { if (vm_map_findspace(map, start, length, 1, addr)) { vm_map_unlock(map); + vm_map_entry_release(count); #if !defined(NO_KMEM_MAP) if (map == kmem_map || map == mb_map) splx(s); @@ -793,9 +901,10 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, } start = *addr; } - result = vm_map_insert(map, object, offset, + result = vm_map_insert(map, &count, object, offset, start, start + length, prot, max, cow); vm_map_unlock(map); + vm_map_entry_release(count); #if !defined(NO_KMEM_MAP) if (map == kmem_map || map == mb_map) @@ -819,9 +928,7 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, * in-transition flag set. */ void -vm_map_simplify_entry(map, entry) - vm_map_t map; - vm_map_entry_t entry; +vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry, int *countp) { vm_map_entry_t next, prev; vm_size_t prevsize, esize; @@ -852,7 +959,7 @@ vm_map_simplify_entry(map, entry) entry->offset = prev->offset; if (prev->object.vm_object) vm_object_deallocate(prev->object.vm_object); - vm_map_entry_dispose(map, prev); + vm_map_entry_dispose(map, prev, countp); } } @@ -876,7 +983,7 @@ vm_map_simplify_entry(map, entry) entry->end = next->end; if (next->object.vm_object) vm_object_deallocate(next->object.vm_object); - vm_map_entry_dispose(map, next); + vm_map_entry_dispose(map, next, countp); } } } @@ -887,10 +994,10 @@ vm_map_simplify_entry(map, entry) * the specified address; if necessary, * it splits the entry into two. */ -#define vm_map_clip_start(map, entry, startaddr) \ +#define vm_map_clip_start(map, entry, startaddr, countp) \ { \ if (startaddr > entry->start) \ - _vm_map_clip_start(map, entry, startaddr); \ + _vm_map_clip_start(map, entry, startaddr, countp); \ } /* @@ -898,10 +1005,7 @@ vm_map_simplify_entry(map, entry) * the entry must be split. */ static void -_vm_map_clip_start(map, entry, start) - vm_map_t map; - vm_map_entry_t entry; - vm_offset_t start; +_vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start, int *countp) { vm_map_entry_t new_entry; @@ -911,7 +1015,7 @@ _vm_map_clip_start(map, entry, start) * starting address. */ - vm_map_simplify_entry(map, entry); + vm_map_simplify_entry(map, entry, countp); /* * If there is no object backing this entry, we might as well create @@ -929,7 +1033,7 @@ _vm_map_clip_start(map, entry, start) entry->offset = 0; } - new_entry = vm_map_entry_create(map); + new_entry = vm_map_entry_create(map, countp); *new_entry = *entry; new_entry->end = start; @@ -951,10 +1055,10 @@ _vm_map_clip_start(map, entry, start) * it splits the entry into two. */ -#define vm_map_clip_end(map, entry, endaddr) \ +#define vm_map_clip_end(map, entry, endaddr, countp) \ { \ if (endaddr < entry->end) \ - _vm_map_clip_end(map, entry, endaddr); \ + _vm_map_clip_end(map, entry, endaddr, countp); \ } /* @@ -962,10 +1066,7 @@ _vm_map_clip_start(map, entry, start) * the entry must be split. */ static void -_vm_map_clip_end(map, entry, end) - vm_map_t map; - vm_map_entry_t entry; - vm_offset_t end; +_vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end, int *countp) { vm_map_entry_t new_entry; @@ -989,7 +1090,7 @@ _vm_map_clip_end(map, entry, end) * Create a new entry and insert it AFTER the specified entry */ - new_entry = vm_map_entry_create(map); + new_entry = vm_map_entry_create(map, countp); *new_entry = *entry; new_entry->start = entry->end = end; @@ -1083,7 +1184,8 @@ vm_map_transition_wait(vm_map_t map) */ static vm_map_entry_t -vm_map_clip_range(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags) +vm_map_clip_range(vm_map_t map, vm_offset_t start, vm_offset_t end, + int *countp, int flags) { vm_map_entry_t start_entry; vm_map_entry_t entry; @@ -1112,8 +1214,8 @@ again: * Since we hold an exclusive map lock we do not have to restart * after clipping, even though clipping may block in zalloc. */ - vm_map_clip_start(map, entry, start); - vm_map_clip_end(map, entry, end); + vm_map_clip_start(map, entry, start, countp); + vm_map_clip_end(map, entry, end, countp); entry->eflags |= MAP_ENTRY_IN_TRANSITION; /* @@ -1129,7 +1231,7 @@ again: if (flags & MAP_CLIP_NO_HOLES) { if (next->start > entry->end) { vm_map_unclip_range(map, start_entry, - start, entry->end, flags); + start, entry->end, countp, flags); return(NULL); } } @@ -1152,14 +1254,14 @@ again: * No restart necessary even though clip_end may block, we * are holding the map lock. */ - vm_map_clip_end(map, next, end); + vm_map_clip_end(map, next, end, countp); next->eflags |= MAP_ENTRY_IN_TRANSITION; entry = next; } if (flags & MAP_CLIP_NO_HOLES) { if (entry->end != end) { vm_map_unclip_range(map, start_entry, - start, entry->end, flags); + start, entry->end, countp, flags); return(NULL); } } @@ -1192,6 +1294,7 @@ vm_map_unclip_range( vm_map_entry_t start_entry, vm_offset_t start, vm_offset_t end, + int *countp, int flags) { vm_map_entry_t entry; @@ -1215,7 +1318,7 @@ vm_map_unclip_range( */ entry = start_entry; while (entry != &map->header && entry->start < end) { - vm_map_simplify_entry(map, entry); + vm_map_simplify_entry(map, entry, countp); entry = entry->next; } } @@ -1239,26 +1342,24 @@ vm_map_unclip_range( * submap (if desired). [Better yet, don't try it.] */ int -vm_map_submap(map, start, end, submap) - vm_map_t map; - vm_offset_t start; - vm_offset_t end; - vm_map_t submap; +vm_map_submap(vm_map_t map, vm_offset_t start, vm_offset_t end, vm_map_t submap) { vm_map_entry_t entry; int result = KERN_INVALID_ARGUMENT; + int count; + count = vm_map_entry_reserve(MAP_RESERVE_COUNT); vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); if (vm_map_lookup_entry(map, start, &entry)) { - vm_map_clip_start(map, entry, start); + vm_map_clip_start(map, entry, start, &count); } else { entry = entry->next; } - vm_map_clip_end(map, entry, end); + vm_map_clip_end(map, entry, end, &count); if ((entry->start == start) && (entry->end == end) && ((entry->eflags & MAP_ENTRY_COW) == 0) && @@ -1268,6 +1369,7 @@ vm_map_submap(map, start, end, submap) result = KERN_SUCCESS; } vm_map_unlock(map); + vm_map_entry_release(count); return (result); } @@ -1286,13 +1388,15 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, { vm_map_entry_t current; vm_map_entry_t entry; + int count; + count = vm_map_entry_reserve(MAP_RESERVE_COUNT); vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); if (vm_map_lookup_entry(map, start, &entry)) { - vm_map_clip_start(map, entry, start); + vm_map_clip_start(map, entry, start, &count); } else { entry = entry->next; } @@ -1305,10 +1409,12 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, while ((current != &map->header) && (current->start < end)) { if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { vm_map_unlock(map); + vm_map_entry_release(count); return (KERN_INVALID_ARGUMENT); } if ((new_prot & current->max_protection) != new_prot) { vm_map_unlock(map); + vm_map_entry_release(count); return (KERN_PROTECTION_FAILURE); } current = current->next; @@ -1318,13 +1424,12 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, * Go back and fix up protections. [Note that clipping is not * necessary the second time.] */ - current = entry; while ((current != &map->header) && (current->start < end)) { vm_prot_t old_prot; - vm_map_clip_end(map, current, end); + vm_map_clip_end(map, current, end, &count); old_prot = current->protection; if (set_max) @@ -1349,12 +1454,13 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, #undef MASK } - vm_map_simplify_entry(map, current); + vm_map_simplify_entry(map, current, &count); current = current->next; } vm_map_unlock(map); + vm_map_entry_release(count); return (KERN_SUCCESS); } @@ -1368,13 +1474,11 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, */ int -vm_map_madvise(map, start, end, behav) - vm_map_t map; - vm_offset_t start, end; - int behav; +vm_map_madvise(vm_map_t map, vm_offset_t start, vm_offset_t end, int behav) { vm_map_entry_t current, entry; int modify_map = 0; + int count; /* * Some madvise calls directly modify the vm_map_entry, in which case @@ -1383,6 +1487,8 @@ vm_map_madvise(map, start, end, behav) * on the map. */ + count = vm_map_entry_reserve(MAP_RESERVE_COUNT); + switch(behav) { case MADV_NORMAL: case MADV_SEQUENTIAL: @@ -1400,6 +1506,7 @@ vm_map_madvise(map, start, end, behav) vm_map_lock_read(map); break; default: + vm_map_entry_release(count); return (KERN_INVALID_ARGUMENT); } @@ -1411,7 +1518,7 @@ vm_map_madvise(map, start, end, behav) if (vm_map_lookup_entry(map, start, &entry)) { if (modify_map) - vm_map_clip_start(map, entry, start); + vm_map_clip_start(map, entry, start, &count); } else { entry = entry->next; } @@ -1430,7 +1537,7 @@ vm_map_madvise(map, start, end, behav) if (current->eflags & MAP_ENTRY_IS_SUB_MAP) continue; - vm_map_clip_end(map, current, end); + vm_map_clip_end(map, current, end, &count); switch (behav) { case MADV_NORMAL: @@ -1457,7 +1564,7 @@ vm_map_madvise(map, start, end, behav) default: break; } - vm_map_simplify_entry(map, current); + vm_map_simplify_entry(map, current, &count); } vm_map_unlock(map); } else { @@ -1510,6 +1617,7 @@ vm_map_madvise(map, start, end, behav) } vm_map_unlock_read(map); } + vm_map_entry_release(count); return(0); } @@ -1528,6 +1636,7 @@ vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end, { vm_map_entry_t entry; vm_map_entry_t temp_entry; + int count; switch (new_inheritance) { case VM_INHERIT_NONE: @@ -1538,27 +1647,28 @@ vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end, return (KERN_INVALID_ARGUMENT); } + count = vm_map_entry_reserve(MAP_RESERVE_COUNT); vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); if (vm_map_lookup_entry(map, start, &temp_entry)) { entry = temp_entry; - vm_map_clip_start(map, entry, start); + vm_map_clip_start(map, entry, start, &count); } else entry = temp_entry->next; while ((entry != &map->header) && (entry->start < end)) { - vm_map_clip_end(map, entry, end); + vm_map_clip_end(map, entry, end, &count); entry->inheritance = new_inheritance; - vm_map_simplify_entry(map, entry); + vm_map_simplify_entry(map, entry, &count); entry = entry->next; } - vm_map_unlock(map); + vm_map_entry_release(count); return (KERN_SUCCESS); } @@ -1576,14 +1686,17 @@ vm_map_user_pageable(map, start, real_end, new_pageable) vm_map_entry_t start_entry; vm_offset_t end; int rv = KERN_SUCCESS; + int count; + count = vm_map_entry_reserve(MAP_RESERVE_COUNT); vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, real_end); end = real_end; - start_entry = vm_map_clip_range(map, start, end, MAP_CLIP_NO_HOLES); + start_entry = vm_map_clip_range(map, start, end, &count, MAP_CLIP_NO_HOLES); if (start_entry == NULL) { vm_map_unlock(map); + vm_map_entry_release(count); return (KERN_INVALID_ADDRESS); } @@ -1718,10 +1831,11 @@ vm_map_user_pageable(map, start, real_end, new_pageable) } } done: - vm_map_unclip_range(map, start_entry, start, real_end, + vm_map_unclip_range(map, start_entry, start, real_end, &count, MAP_CLIP_NO_HOLES); map->timestamp++; vm_map_unlock(map); + vm_map_entry_release(count); return (rv); } @@ -1735,28 +1849,38 @@ done: * * The map must not be locked, but a reference * must remain to the map throughout the call. + * + * This function may be called via the zalloc path and must properly + * reserve map entries for kernel_map. */ int -vm_map_pageable(map, start, real_end, new_pageable) - vm_map_t map; - vm_offset_t start; - vm_offset_t real_end; - boolean_t new_pageable; +vm_map_pageable(vm_map_t map, vm_offset_t start, + vm_offset_t real_end, boolean_t new_pageable) { vm_map_entry_t entry; vm_map_entry_t start_entry; vm_offset_t end; int rv = KERN_SUCCESS; + int count; int s; + if (map == kernel_map) + count = vm_map_entry_kreserve(MAP_RESERVE_COUNT); +#if !defined(NO_KMEM_MAP) + else if (map == kmem_map) + count = vm_map_entry_kreserve(MAP_RESERVE_COUNT); +#endif + else + count = vm_map_entry_reserve(MAP_RESERVE_COUNT); vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, real_end); end = real_end; - start_entry = vm_map_clip_range(map, start, end, MAP_CLIP_NO_HOLES); + start_entry = vm_map_clip_range(map, start, end, &count, MAP_CLIP_NO_HOLES); if (start_entry == NULL) { vm_map_unlock(map); - return (KERN_INVALID_ADDRESS); + rv = KERN_INVALID_ADDRESS; + goto failure; } if (new_pageable == 0) { /* @@ -1931,13 +2055,44 @@ vm_map_pageable(map, start, real_end, new_pageable) } } done: - vm_map_unclip_range(map, start_entry, start, real_end, + vm_map_unclip_range(map, start_entry, start, real_end, &count, MAP_CLIP_NO_HOLES); map->timestamp++; vm_map_unlock(map); +failure: + if (map == kernel_map) + vm_map_entry_krelease(count); +#if !defined(NO_KMEM_MAP) + else if (map == kmem_map) + vm_map_entry_krelease(count); +#endif + else + vm_map_entry_release(count); return (rv); } +/* + * vm_map_set_wired_quick() + * + * Mark a newly allocated address range as wired but do not fault in + * the pages. The caller is expected to load the pages into the object. + * + * The map must be locked on entry and will remain locked on return. + */ +void +vm_map_set_wired_quick(vm_map_t map, vm_offset_t addr, vm_size_t size, int *countp) +{ + vm_map_entry_t scan; + vm_map_entry_t entry; + + entry = vm_map_clip_range(map, addr, addr + size, countp, MAP_CLIP_NO_HOLES); + for (scan = entry; scan != &map->header && scan->start < addr + size; scan = scan->next) { + KKASSERT(entry->wired_count == 0); + entry->wired_count = 1; + } + vm_map_unclip_range(map, entry, addr, addr + size, countp, MAP_CLIP_NO_HOLES); +} + /* * vm_map_clean * @@ -2078,9 +2233,7 @@ vm_map_clean(map, start, end, syncio, invalidate) * [This is the reason for this routine's existence.] */ static void -vm_map_entry_unwire(map, entry) - vm_map_t map; - vm_map_entry_t entry; +vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry) { vm_fault_unwire(map, entry->start, entry->end); entry->wired_count = 0; @@ -2092,9 +2245,7 @@ vm_map_entry_unwire(map, entry) * Deallocate the given entry from the target map. */ static void -vm_map_entry_delete(map, entry) - vm_map_t map; - vm_map_entry_t entry; +vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry, int *countp) { vm_map_entry_unlink(map, entry); map->size -= entry->end - entry->start; @@ -2103,7 +2254,7 @@ vm_map_entry_delete(map, entry) vm_object_deallocate(entry->object.vm_object); } - vm_map_entry_dispose(map, entry); + vm_map_entry_dispose(map, entry, countp); } /* @@ -2113,10 +2264,7 @@ vm_map_entry_delete(map, entry) * map. */ int -vm_map_delete(map, start, end) - vm_map_t map; - vm_offset_t start; - vm_offset_t end; +vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end, int *countp) { vm_object_t object; vm_map_entry_t entry; @@ -2131,7 +2279,7 @@ again: entry = first_entry->next; else { entry = first_entry; - vm_map_clip_start(map, entry, start); + vm_map_clip_start(map, entry, start, countp); /* * Fix the lookup hint now, rather than each time though the * loop. @@ -2174,7 +2322,7 @@ again: vm_map_transition_wait(map); goto again; } - vm_map_clip_end(map, entry, end); + vm_map_clip_end(map, entry, end, countp); s = entry->start; e = entry->end; @@ -2220,7 +2368,7 @@ again: * (Otherwise, its page frames may be reallocated, and any * modify bits will be set in the wrong object!) */ - vm_map_entry_delete(map, entry); + vm_map_entry_delete(map, entry, countp); entry = next; } return (KERN_SUCCESS); @@ -2233,12 +2381,10 @@ again: * This is the exported form of vm_map_delete. */ int -vm_map_remove(map, start, end) - vm_map_t map; - vm_offset_t start; - vm_offset_t end; +vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end) { int result; + int count; #if !defined(NO_KMEM_MAP) int s = 0; #endif @@ -2247,11 +2393,12 @@ vm_map_remove(map, start, end) if (map == kmem_map || map == mb_map) s = splvm(); #endif - + count = vm_map_entry_reserve(MAP_RESERVE_COUNT); vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); - result = vm_map_delete(map, start, end); + result = vm_map_delete(map, start, end, &count); vm_map_unlock(map); + vm_map_entry_release(count); #if !defined(NO_KMEM_MAP) if (map == kmem_map || map == mb_map) @@ -2312,8 +2459,7 @@ vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end, * being a negative impact on memory usage. */ static void -vm_map_split(entry) - vm_map_entry_t entry; +vm_map_split(vm_map_entry_t entry) { vm_page_t m; vm_object_t orig_object, new_object, source; @@ -2408,9 +2554,8 @@ vm_map_split(entry) * entry. The entries *must* be aligned properly. */ static void -vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) - vm_map_t src_map, dst_map; - vm_map_entry_t src_entry, dst_entry; +vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map, + vm_map_entry_t src_entry, vm_map_entry_t dst_entry) { vm_object_t src_object; @@ -2478,8 +2623,7 @@ vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) * The source map must not be locked. */ struct vmspace * -vmspace_fork(vm1) - struct vmspace *vm1; +vmspace_fork(struct vmspace *vm1) { struct vmspace *vm2; vm_map_t old_map = &vm1->vm_map; @@ -2487,6 +2631,7 @@ vmspace_fork(vm1) vm_map_entry_t old_entry; vm_map_entry_t new_entry; vm_object_t object; + int count; vm_map_lock(old_map); old_map->infork = 1; @@ -2497,8 +2642,16 @@ vmspace_fork(vm1) new_map = &vm2->vm_map; /* XXX */ new_map->timestamp = 1; + count = 0; old_entry = old_map->header.next; + while (old_entry != &old_map->header) { + ++count; + old_entry = old_entry->next; + } + count = vm_map_entry_reserve(count + MAP_RESERVE_COUNT); + + old_entry = old_map->header.next; while (old_entry != &old_map->header) { if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP) panic("vm_map_fork: encountered a submap"); @@ -2540,7 +2693,7 @@ vmspace_fork(vm1) /* * Clone the entry, referencing the shared object. */ - new_entry = vm_map_entry_create(new_map); + new_entry = vm_map_entry_create(new_map, &count); *new_entry = *old_entry; new_entry->eflags &= ~MAP_ENTRY_USER_WIRED; new_entry->wired_count = 0; @@ -2567,7 +2720,7 @@ vmspace_fork(vm1) /* * Clone the entry and link into the map. */ - new_entry = vm_map_entry_create(new_map); + new_entry = vm_map_entry_create(new_map, &count); *new_entry = *old_entry; new_entry->eflags &= ~MAP_ENTRY_USER_WIRED; new_entry->wired_count = 0; @@ -2584,6 +2737,7 @@ vmspace_fork(vm1) new_map->size = old_map->size; old_map->infork = 0; vm_map_unlock(old_map); + vm_map_entry_release(count); return (vm2); } @@ -2596,6 +2750,7 @@ vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, vm_map_entry_t new_stack_entry; vm_size_t init_ssize; int rv; + int count; if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS) return (KERN_NO_SPACE); @@ -2605,11 +2760,13 @@ vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, else init_ssize = sgrowsiz; + count = vm_map_entry_reserve(MAP_RESERVE_COUNT); vm_map_lock(map); /* If addr is already mapped, no go */ if (vm_map_lookup_entry(map, addrbos, &prev_entry)) { vm_map_unlock(map); + vm_map_entry_release(count); return (KERN_NO_SPACE); } @@ -2617,6 +2774,7 @@ vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, if (map->size + init_ssize > curproc->p_rlimit[RLIMIT_VMEM].rlim_cur) { vm_map_unlock(map); + vm_map_entry_release(count); return (KERN_NO_SPACE); } @@ -2631,6 +2789,7 @@ vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, if ((prev_entry->next != &map->header) && (prev_entry->next->start < addrbos + max_ssize)) { vm_map_unlock(map); + vm_map_entry_release(count); return (KERN_NO_SPACE); } @@ -2643,13 +2802,14 @@ vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, * eliminate these as input parameters, and just * pass these values here in the insert call. */ - rv = vm_map_insert(map, NULL, 0, addrbos + max_ssize - init_ssize, + rv = vm_map_insert(map, &count, + NULL, 0, addrbos + max_ssize - init_ssize, addrbos + max_ssize, prot, max, cow); /* Now set the avail_ssize amount */ if (rv == KERN_SUCCESS){ if (prev_entry != &map->header) - vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize); + vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize, &count); new_stack_entry = prev_entry->next; if (new_stack_entry->end != addrbos + max_ssize || new_stack_entry->start != addrbos + max_ssize - init_ssize) @@ -2659,6 +2819,7 @@ vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, } vm_map_unlock(map); + vm_map_entry_release(count); return (rv); } @@ -2677,11 +2838,13 @@ vm_map_growstack (struct proc *p, vm_offset_t addr) struct vmspace *vm = p->p_vmspace; vm_map_t map = &vm->vm_map; vm_offset_t end; - int grow_amount; - int rv = KERN_SUCCESS; - int is_procstack; - int use_read_lock = 1; + int grow_amount; + int rv = KERN_SUCCESS; + int is_procstack; + int use_read_lock = 1; + int count; + count = vm_map_entry_reserve(MAP_RESERVE_COUNT); Retry: if (use_read_lock) vm_map_lock_read(map); @@ -2785,7 +2948,8 @@ Retry: addr = end; } - rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start, + rv = vm_map_insert(map, &count, + NULL, 0, addr, stack_entry->start, VM_PROT_ALL, VM_PROT_ALL, 0); @@ -2793,7 +2957,7 @@ Retry: /* Adjust the available stack space by the amount we grew. */ if (rv == KERN_SUCCESS) { if (prev_entry != &map->header) - vm_map_clip_end(map, prev_entry, addr); + vm_map_clip_end(map, prev_entry, addr, &count); new_stack_entry = prev_entry->next; if (new_stack_entry->end != stack_entry->start || new_stack_entry->start != addr) @@ -2813,6 +2977,7 @@ done: vm_map_unlock_read(map); else vm_map_unlock(map); + vm_map_entry_release(count); return (rv); } @@ -2822,7 +2987,8 @@ done: */ void -vmspace_exec(struct proc *p) { +vmspace_exec(struct proc *p) +{ struct vmspace *oldvmspace = p->p_vmspace; struct vmspace *newvmspace; vm_map_t map = &p->p_vmspace->vm_map; @@ -2850,7 +3016,8 @@ vmspace_exec(struct proc *p) { */ void -vmspace_unshare(struct proc *p) { +vmspace_unshare(struct proc *p) +{ struct vmspace *oldvmspace = p->p_vmspace; struct vmspace *newvmspace; @@ -2863,7 +3030,6 @@ vmspace_unshare(struct proc *p) { if (p == curproc) pmap_activate(p); } - /* * vm_map_lookup: @@ -3077,15 +3243,14 @@ done: */ void -vm_map_lookup_done(map, entry) - vm_map_t map; - vm_map_entry_t entry; +vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry, int count) { /* * Unlock the main-level map */ - vm_map_unlock_read(map); + if (count) + vm_map_entry_release(count); } /* @@ -3112,6 +3277,7 @@ vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages) vm_pindex_t first_pindex, osize, oindex; off_t ooffset; int cnt; + int count; if (npages) *npages = 0; @@ -3122,13 +3288,15 @@ vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages) while (cnt > 0) { map = mapa; + count = vm_map_entry_reserve(MAP_RESERVE_COUNT); + if ((vm_map_lookup(&map, uaddr, VM_PROT_READ, &entry, &first_object, &first_pindex, &prot, &wired)) != KERN_SUCCESS) { return EFAULT; } - vm_map_clip_start(map, entry, uaddr); + vm_map_clip_start(map, entry, uaddr, &count); tcnt = cnt; tend = uaddr + tcnt; @@ -3137,7 +3305,7 @@ vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages) tend = entry->end; } - vm_map_clip_end(map, entry, tend); + vm_map_clip_end(map, entry, tend, &count); start = entry->start; end = entry->end; @@ -3150,7 +3318,7 @@ vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages) for (idx = 0; idx < osize; idx++) { vm_page_t m; if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) { - vm_map_lookup_done(map, entry); + vm_map_lookup_done(map, entry, count); return 0; } /* @@ -3159,7 +3327,7 @@ vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages) */ if ((m->flags & PG_BUSY) || ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) { - vm_map_lookup_done(map, entry); + vm_map_lookup_done(map, entry, count); return 0; } } @@ -3290,12 +3458,13 @@ vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages) } SAVE_HINT(map, entry->prev); - vm_map_entry_delete(map, entry); + vm_map_entry_delete(map, entry, &count); object = srcobject; ooffset = cp; - rv = vm_map_insert(map, object, ooffset, start, tend, + rv = vm_map_insert(map, &count, + object, ooffset, start, tend, VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE); if (rv != KERN_SUCCESS) @@ -3310,6 +3479,7 @@ vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages) map->timestamp++; vm_map_unlock(map); + vm_map_entry_release(count); cnt -= tcnt; uaddr += tcnt; diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 4bf724c5c6..b3a49fd9d5 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -62,7 +62,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/vm/vm_map.h,v 1.54.2.5 2003/01/13 22:51:17 dillon Exp $ - * $DragonFly: src/sys/vm/vm_map.h,v 1.6 2003/08/25 17:01:13 dillon Exp $ + * $DragonFly: src/sys/vm/vm_map.h,v 1.7 2003/08/27 01:43:08 dillon Exp $ */ /* @@ -138,6 +138,13 @@ struct vm_map_entry { */ #define MAP_CLIP_NO_HOLES 0x0001 +/* + * This reserve count for vm_map_entry_reserve() should cover all nominal + * single-insertion operations, including any necessary clipping. + */ +#define MAP_RESERVE_COUNT 4 +#define MAP_RESERVE_SLOP 32 + static __inline u_char vm_map_entry_behavior(struct vm_map_entry *entry) { @@ -330,10 +337,12 @@ vmspace_resident_count(struct vmspace *vmspace) return pmap_resident_count(vmspace_pmap(vmspace)); } -/* XXX: number of kernel maps and entries to statically allocate */ +/* + * Number of kernel maps and entries to statically allocate, required + * during boot to bootstrap the VM system. + */ #define MAX_KMAP 10 -#define MAX_KMAPENT 128 -#define MAX_MAPENT 128 +#define MAX_MAPENT 256 /* * Copy-on-write flags for vm_map operations @@ -360,16 +369,20 @@ vmspace_resident_count(struct vmspace *vmspace) #ifdef _KERNEL boolean_t vm_map_check_protection (vm_map_t, vm_offset_t, vm_offset_t, vm_prot_t); struct pmap; +int vm_map_entry_reserve(int); +int vm_map_entry_kreserve(int); +void vm_map_entry_release(int); +void vm_map_entry_krelease(int); vm_map_t vm_map_create (struct pmap *, vm_offset_t, vm_offset_t); -int vm_map_delete (vm_map_t, vm_offset_t, vm_offset_t); +int vm_map_delete (vm_map_t, vm_offset_t, vm_offset_t, int *); int vm_map_find (vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t *, vm_size_t, boolean_t, vm_prot_t, vm_prot_t, int); int vm_map_findspace (vm_map_t, vm_offset_t, vm_size_t, vm_offset_t, vm_offset_t *); int vm_map_inherit (vm_map_t, vm_offset_t, vm_offset_t, vm_inherit_t); void vm_map_init (struct vm_map *, vm_offset_t, vm_offset_t); -int vm_map_insert (vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t, vm_offset_t, vm_prot_t, vm_prot_t, int); +int vm_map_insert (vm_map_t, int *, vm_object_t, vm_ooffset_t, vm_offset_t, vm_offset_t, vm_prot_t, vm_prot_t, int); int vm_map_lookup (vm_map_t *, vm_offset_t, vm_prot_t, vm_map_entry_t *, vm_object_t *, vm_pindex_t *, vm_prot_t *, boolean_t *); -void vm_map_lookup_done (vm_map_t, vm_map_entry_t); +void vm_map_lookup_done (vm_map_t, vm_map_entry_t, int); boolean_t vm_map_lookup_entry (vm_map_t, vm_offset_t, vm_map_entry_t *); int vm_map_pageable (vm_map_t, vm_offset_t, vm_offset_t, boolean_t); int vm_map_user_pageable (vm_map_t, vm_offset_t, vm_offset_t, boolean_t); @@ -379,13 +392,14 @@ int vm_map_remove (vm_map_t, vm_offset_t, vm_offset_t); void vm_map_startup (void); int vm_map_submap (vm_map_t, vm_offset_t, vm_offset_t, vm_map_t); int vm_map_madvise (vm_map_t, vm_offset_t, vm_offset_t, int); -void vm_map_simplify_entry (vm_map_t, vm_map_entry_t); +void vm_map_simplify_entry (vm_map_t, vm_map_entry_t, int *); void vm_init2 (void); int vm_uiomove (vm_map_t, vm_object_t, off_t, int, vm_offset_t, int *); void vm_freeze_copyopts (vm_object_t, vm_pindex_t, vm_pindex_t); int vm_map_stack (vm_map_t, vm_offset_t, vm_size_t, vm_prot_t, vm_prot_t, int); int vm_map_growstack (struct proc *p, vm_offset_t addr); int vmspace_swap_count (struct vmspace *vmspace); +void vm_map_set_wired_quick(vm_map_t map, vm_offset_t addr, vm_size_t size, int *); #endif #endif /* _VM_MAP_ */ diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 54f2d31404..12d3c74863 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -35,7 +35,7 @@ * * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 * $FreeBSD: src/sys/vm/vm_page.c,v 1.147.2.18 2002/03/10 05:03:19 alc Exp $ - * $DragonFly: src/sys/vm/vm_page.c,v 1.8 2003/08/25 17:01:13 dillon Exp $ + * $DragonFly: src/sys/vm/vm_page.c,v 1.9 2003/08/27 01:43:08 dillon Exp $ */ /* @@ -1713,6 +1713,7 @@ contigmalloc1( vm_offset_t addr, phys, tmp_addr; int pass; vm_page_t pga = vm_page_array; + int count; size = round_page(size); if (size == 0) @@ -1848,6 +1849,7 @@ again1: * return kernel VM pointer. */ vm_map_lock(map); + count = vm_map_entry_reserve(MAP_RESERVE_COUNT); if (vm_map_findspace(map, vm_map_min(map), size, 1, &addr) != KERN_SUCCESS) { /* @@ -1856,13 +1858,16 @@ again1: * above available. */ vm_map_unlock(map); + vm_map_entry_release(count); splx(s); return (NULL); } vm_object_reference(kernel_object); - vm_map_insert(map, kernel_object, addr - VM_MIN_KERNEL_ADDRESS, + vm_map_insert(map, &count, + kernel_object, addr - VM_MIN_KERNEL_ADDRESS, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0); vm_map_unlock(map); + vm_map_entry_release(count); tmp_addr = addr; for (i = start; i < (start + size / PAGE_SIZE); i++) { diff --git a/sys/vm/vm_zone.c b/sys/vm/vm_zone.c index 320db7701b..635afcf3a6 100644 --- a/sys/vm/vm_zone.c +++ b/sys/vm/vm_zone.c @@ -12,7 +12,7 @@ * John S. Dyson. * * $FreeBSD: src/sys/vm/vm_zone.c,v 1.30.2.6 2002/10/10 19:50:16 dillon Exp $ - * $DragonFly: src/sys/vm/vm_zone.c,v 1.8 2003/08/25 19:50:33 dillon Exp $ + * $DragonFly: src/sys/vm/vm_zone.c,v 1.9 2003/08/27 01:43:08 dillon Exp $ */ #include @@ -167,7 +167,10 @@ zinitna(vm_zone_t z, vm_object_t obj, char *name, int size, /* * If we cannot wait, allocate KVA space up front, and we will fill - * in pages as needed. + * in pages as needed. This is particularly required when creating + * an allocation space for map entries in kernel_map, because we + * do not want to go into a recursion deadlock with + * vm_map_entry_reserve(). */ if (z->zflags & ZONE_INTERRUPT) { @@ -395,6 +398,12 @@ zget(vm_zone_t z) item = NULL; } + /* + * Recover any reserve missing due to a zalloc/kreserve/krelease + * recursion. + */ + vm_map_entry_reserve(0); + return item; } diff --git a/sys/vm/vm_zone.h b/sys/vm/vm_zone.h index 09f3a6f2a3..19112d0765 100644 --- a/sys/vm/vm_zone.h +++ b/sys/vm/vm_zone.h @@ -12,7 +12,7 @@ * John S. Dyson. * * $FreeBSD: src/sys/vm/vm_zone.h,v 1.13.2.2 2002/10/10 19:50:16 dillon Exp $ - * $DragonFly: src/sys/vm/vm_zone.h,v 1.4 2003/08/20 08:03:01 rob Exp $ + * $DragonFly: src/sys/vm/vm_zone.h,v 1.5 2003/08/27 01:43:08 dillon Exp $ */ #ifndef _SYS_ZONE_H @@ -22,6 +22,7 @@ #define ZONE_INTERRUPT 0x0001 /* If you need to allocate at int time */ #define ZONE_PANICFAIL 0x0002 /* panic if the zalloc fails */ #define ZONE_BOOT 0x0010 /* Internal flag used by zbootinit */ +#define ZONE_USE_RESERVE 0x0020 /* use reserve memory if necessary */ #ifndef _SYS_THREAD_H_ #include -- 2.41.0