kernel - Implement a contiguous memory reserve for contigmalloc()
authorMatthew Dillon <dillon@apollo.backplane.com>
Sat, 19 Nov 2011 05:04:00 +0000 (21:04 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Sat, 19 Nov 2011 05:04:00 +0000 (21:04 -0800)
* We initially reserve the lower 1/4 of memory or 256MB, whichever is
  smaller.  The ALIST API is used to manage the memory.

* Once device initialization is complete, and before int is executed,
  we reduce the reserve and return pages to the normal VM paging queues.
  The reserve is reduced to ~16MB or 1/16 total memory, whichever is
  smaller.

* This can be adjusted with a tunable 'vm.dma_reserved'.

* contigmalloc() now tries the DMA reserve first.  If it fails it falls
  back to the original contigmalloc() code.  contigfree() determines whether
  the pages belong to the DMA reserve or not and will either return them
  to the reserve or free them to the normal paging queues as appropriate.

  VM pages in the reserve are left wired and not busy, and they are returned
  to the reserve in the same state.  This greatly simplifies operations that
  act on the reserve.

* Fix various bits of code that contigmalloc()'d but then kfree()'d instead
  of contigfree()'d.

sys/dev/disk/ncr/ncr.c
sys/dev/raid/dpt/dpt_scsi.c
sys/platform/pc32/i386/machdep.c
sys/platform/pc64/x86_64/machdep.c
sys/sys/vmmeter.h
sys/vm/vm_contig.c
sys/vm/vm_extern.h
sys/vm/vm_page.c
sys/vm/vm_page.h

index 27ebc7a..1fc5476 100644 (file)
@@ -3591,7 +3591,7 @@ ncr_attach (device_t dev)
                np->bst2 = rman_get_bustag(np->sram_res);
                np->bsh2 = rman_get_bushandle(np->sram_res);
        } else if (sizeof (struct script) > PAGE_SIZE) {
-               np->script  = (struct script*) vm_page_alloc_contig 
+               np->script  = (struct script*) kmem_alloc_contig
                        (round_page(sizeof (struct script)), 
                         0, 0xffffffff, PAGE_SIZE);
        } else {
@@ -3601,7 +3601,7 @@ ncr_attach (device_t dev)
 
        /* XXX JGibbs - Use contigmalloc */
        if (sizeof (struct scripth) > PAGE_SIZE) {
-               np->scripth = (struct scripth*) vm_page_alloc_contig 
+               np->scripth = (struct scripth*) kmem_alloc_contig
                        (round_page(sizeof (struct scripth)), 
                         0, 0xffffffff, PAGE_SIZE);
        } else 
index 5a22ef9..63a7056 100644 (file)
@@ -2230,6 +2230,7 @@ dpt_user_cmd(dpt_softc_t * dpt, eata_pt_t * user_cmd,
        int        huh;
        int        result;
        int        submitted;
+       size_t     contigsize = 0;
 
        data = NULL;
        channel = minor2hba(minor_no);
@@ -2289,6 +2290,7 @@ dpt_user_cmd(dpt_softc_t * dpt, eata_pt_t * user_cmd,
        if (ccb->eata_ccb.DataIn || ccb->eata_ccb.DataOut) {
                /* Data I/O is involved in this command.  Alocate buffer */
                if (ccb->eata_ccb.cp_datalen > PAGE_SIZE) {
+                       contigsize = ccb->eata_ccb.cp_datalen;
                        data = contigmalloc(ccb->eata_ccb.cp_datalen,
                                            M_TEMP, M_WAITOK, 0, ~0,
                                            ccb->eata_ccb.cp_datalen,
@@ -2322,8 +2324,12 @@ dpt_user_cmd(dpt_softc_t * dpt, eata_pt_t * user_cmd,
        if (ccb->eata_ccb.cp_datalen != 0) {
                if (dpt_scatter_gather(dpt, ccb, ccb->eata_ccb.cp_datalen,
                                       data) != 0) {
-                       if (data != NULL)
-                               kfree(data, M_TEMP);
+                       if (data != NULL) {
+                               if (contigsize)
+                                       contigfree(data, contigsize, M_TEMP);
+                               else
+                                       kfree(data, M_TEMP);
+                       }
                        return (EFAULT);
                }
        }
@@ -2389,8 +2395,12 @@ dpt_user_cmd(dpt_softc_t * dpt, eata_pt_t * user_cmd,
        (void) tsleep((void *) ccb, PCATCH, "dptucw", 100 * hz);
 
        /* Free allocated memory */
-       if (data != NULL)
-               kfree(data, M_TEMP);
+       if (data != NULL) {
+               if (contigsize)
+                       contigfree(data, contigsize, M_TEMP);
+               else
+                       kfree(data, M_TEMP);
+       }
 
        return (0);
 }
index 5f52b84..d7844f1 100644 (file)
@@ -361,8 +361,9 @@ again:
 #endif
 
        kprintf("avail memory = %ju (%ju MB)\n",
-               (intmax_t)ptoa(vmstats.v_free_count),
-               (intmax_t)ptoa(vmstats.v_free_count) / 1024 / 1024);
+               (intmax_t)ptoa(vmstats.v_free_count + vmstats.v_dma_pages),
+               (intmax_t)ptoa(vmstats.v_free_count + vmstats.v_dma_pages) /
+               1024 / 1024);
 
        /*
         * Set up buffers, so they can be used to read disk labels.
index 74bbd53..193c5c1 100644 (file)
@@ -383,8 +383,9 @@ again:
 #endif
 
        kprintf("avail memory = %ju (%ju MB)\n",
-               (uintmax_t)ptoa(vmstats.v_free_count),
-               (uintmax_t)ptoa(vmstats.v_free_count) / 1024 / 1024);
+               (uintmax_t)ptoa(vmstats.v_free_count + vmstats.v_dma_pages),
+               (uintmax_t)ptoa(vmstats.v_free_count + vmstats.v_dma_pages) /
+               1024 / 1024);
 
        /*
         * Set up buffers, so they can be used to read disk labels.
index db5e67e..220cf47 100644 (file)
@@ -132,6 +132,9 @@ struct vmstats {
        u_int v_pageout_free_min;   /* min number pages reserved for kernel */
        u_int v_interrupt_free_min; /* reserved number of pages for int code */
        u_int v_free_severe;    /* severe depletion of pages below this pt */
+       u_int v_dma_avail;      /* free dma-reserved pages */
+       u_int v_dma_pages;      /* total dma-reserved pages */
+       u_int v_unused[8];
 };
 
 #ifdef _KERNEL
index c7f3edc..f249db0 100644 (file)
@@ -193,6 +193,10 @@ vm_contig_pg_clean(int queue, int count)
                        vm_page_wakeup(m);
                        continue;
                }
+               if (m->wire_count || m->hold_count) {
+                       vm_page_wakeup(m);
+                       continue;
+               }
                if ((object = m->object) == NULL) {
                        vm_page_wakeup(m);
                        continue;
@@ -258,7 +262,14 @@ vm_contig_pg_alloc(unsigned long size, vm_paddr_t low, vm_paddr_t high,
        if ((boundary & (boundary - 1)) != 0)
                panic("vm_contig_pg_alloc: boundary must be a power of 2");
 
-       start = 0;
+       /*
+        * See if we can get the pages from the contiguous page reserve
+        * alist.  The returned pages will be allocated and wired but not
+        * busied.
+        */
+       m = vm_page_alloc_contig(low, high, alignment, boundary, size);
+       if (m)
+               return (m - &pga[0]);
 
        /*
         * Three passes (0, 1, 2).  Each pass scans the VM page list for
@@ -266,6 +277,7 @@ vm_contig_pg_alloc(unsigned long size, vm_paddr_t low, vm_paddr_t high,
         * we attempt to flush inactive pages and reset the start index back
         * to 0.  For passes 1 and 2 we also attempt to flush active pages.
         */
+       start = 0;
        for (pass = 0; pass < 3; pass++) {
                /*
                 * Find first page in array that is free, within range, 
@@ -355,7 +367,7 @@ again:
                }
 
                /*
-                * Try to allocate the pages.
+                * Try to allocate the pages, wiring them as we go.
                 *
                 * (still in critical section)
                 */
@@ -405,6 +417,7 @@ again:
                         */
                        vm_page_flag_clear(m, ~(PG_BUSY | PG_SBUSY |
                                                PG_ZERO | PG_WANTED));
+                       vm_page_wire(m);
                        vm_page_wakeup(m);
                }
 
@@ -434,18 +447,18 @@ static void
 vm_contig_pg_free(int start, u_long size)
 {
        vm_page_t pga = vm_page_array;
-       vm_page_t m;
-       int i;
        
        size = round_page(size);
        if (size == 0)
                panic("vm_contig_pg_free: size must not be 0");
 
-       for (i = start; i < (start + size / PAGE_SIZE); i++) {
-               m = &pga[i];
-               vm_page_busy_wait(m, FALSE, "cpgfr");
-               vm_page_free(m);
-       }
+       /*
+        * The pages are wired, vm_page_free_contig() determines whether they
+        * belong to the contig space or not and either frees them to that
+        * space (leaving them wired), or unwires the page and frees it to the
+        * normal PQ_FREE queue.
+        */
+       vm_page_free_contig(&pga[start], size);
 }
 
 /*
@@ -460,65 +473,24 @@ vm_contig_pg_free(int start, u_long size)
 static vm_offset_t
 vm_contig_pg_kmap(int start, u_long size, vm_map_t map, int flags)
 {
-       vm_offset_t addr, tmp_addr;
+       vm_offset_t addr;
+       vm_paddr_t pa;
        vm_page_t pga = vm_page_array;
-       int i, count;
+       u_long offset;
 
-       size = round_page(size);
        if (size == 0)
                panic("vm_contig_pg_kmap: size must not be 0");
-
-       /*
-        * We've found a contiguous chunk that meets our requirements.
-        * Allocate KVM, and assign phys pages and return a kernel VM
-        * pointer.
-        */
-       count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
-       vm_map_lock(map);
-       if (vm_map_findspace(map, vm_map_min(map), size, PAGE_SIZE, 0, &addr) !=
-           KERN_SUCCESS) {
-               /*
-                * XXX We almost never run out of kernel virtual
-                * space, so we don't make the allocated memory
-                * above available.
-                */
-               vm_map_unlock(map);
-               vm_map_entry_release(count);
-               return (0);
+       size = round_page(size);
+       addr = kmem_alloc_pageable(&kernel_map, size);
+       if (addr) {
+               pa = VM_PAGE_TO_PHYS(&pga[start]);
+               for (offset = 0; offset < size; offset += PAGE_SIZE)
+                       pmap_kenter_quick(addr + offset, pa + offset);
+               smp_invltlb();
+               if (flags & M_ZERO)
+                       bzero((void *)addr, size);
        }
-
-       /*
-        * kernel_object maps 1:1 to kernel_map.
-        */
-       vm_object_hold(&kernel_object);
-       vm_object_reference_locked(&kernel_object);
-       vm_map_insert(map, &count, 
-                     &kernel_object, addr,
-                     addr, addr + size,
-                     VM_MAPTYPE_NORMAL,
-                     VM_PROT_ALL, VM_PROT_ALL,
-                     0);
-       vm_map_unlock(map);
-       vm_map_entry_release(count);
-
-       tmp_addr = addr;
-       for (i = start; i < (start + size / PAGE_SIZE); i++) {
-               vm_page_t m = &pga[i];
-               if (vm_page_insert(m, &kernel_object, OFF_TO_IDX(tmp_addr)) ==
-                   FALSE) {
-                       panic("vm_contig_pg_kmap: page already exists @%p",
-                             (void *)(intptr_t)tmp_addr);
-               }
-               if ((flags & M_ZERO) && !(m->flags & PG_ZERO))
-                       pmap_zero_page(VM_PAGE_TO_PHYS(m));
-               m->flags = 0;
-               tmp_addr += PAGE_SIZE;
-       }
-       vm_map_wire(map, addr, addr + size, 0);
-
-       vm_object_drop(&kernel_object);
-
-       return (addr);
+       return(addr);
 }
 
 /*
@@ -542,15 +514,10 @@ contigmalloc(
  * No requirements.
  */
 void *
-contigmalloc_map(
-       unsigned long size,     /* should be size_t here and for malloc() */
-       struct malloc_type *type,
-       int flags,
-       vm_paddr_t low,
-       vm_paddr_t high,
-       unsigned long alignment,
-       unsigned long boundary,
-       vm_map_t map)
+contigmalloc_map(unsigned long size, struct malloc_type *type,
+                int flags, vm_paddr_t low, vm_paddr_t high,
+                unsigned long alignment, unsigned long boundary,
+                vm_map_t map)
 {
        int index;
        void *rv;
@@ -577,18 +544,27 @@ contigmalloc_map(
 void
 contigfree(void *addr, unsigned long size, struct malloc_type *type)
 {
+       vm_paddr_t pa;
+       vm_page_t m;
+
+       if (size == 0)
+               panic("vm_contig_pg_kmap: size must not be 0");
+       size = round_page(size);
+
+       pa = pmap_extract(&kernel_pmap, (vm_offset_t)addr);
+       pmap_qremove((vm_offset_t)addr, size / PAGE_SIZE);
        kmem_free(&kernel_map, (vm_offset_t)addr, size);
+
+       m = PHYS_TO_VM_PAGE(pa);
+       vm_page_free_contig(m, size);
 }
 
 /*
  * No requirements.
  */
 vm_offset_t
-vm_page_alloc_contig(
-       vm_offset_t size,
-       vm_paddr_t low,
-       vm_paddr_t high,
-       vm_offset_t alignment)
+kmem_alloc_contig(vm_offset_t size, vm_paddr_t low, vm_paddr_t high,
+                 vm_offset_t alignment)
 {
        return ((vm_offset_t)contigmalloc_map(size, M_DEVBUF, M_NOWAIT, low,
                                high, alignment, 0ul, &kernel_map));
index 1664ea7..42149f6 100644 (file)
@@ -102,7 +102,7 @@ void vm_wait_nominal (void);
 void vm_init_limits(struct proc *);
 
 int vm_mmap (vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int, void *, vm_ooffset_t);
-vm_offset_t vm_page_alloc_contig (vm_offset_t, vm_paddr_t, vm_paddr_t, vm_offset_t);
+vm_offset_t kmem_alloc_contig (vm_offset_t, vm_paddr_t, vm_paddr_t, vm_offset_t);
 void vm_set_page_size (void);
 struct vmspace *vmspace_alloc (vm_offset_t, vm_offset_t);
 struct vmspace *vmspace_fork (struct vmspace *);
index c28f5c7..74eb715 100644 (file)
@@ -73,6 +73,8 @@
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 #include <sys/kernel.h>
+#include <sys/alist.h>
+#include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
@@ -111,6 +113,16 @@ LIST_HEAD(vm_page_action_list, vm_page_action);
 struct vm_page_action_list     action_list[VMACTION_HSIZE];
 static volatile int vm_pages_waiting;
 
+static struct alist vm_contig_alist;
+static struct almeta vm_contig_ameta[ALIST_RECORDS_65536];
+static struct spinlock vm_contig_spin = SPINLOCK_INITIALIZER(&vm_contig_spin);
+
+static u_long vm_dma_reserved = 0;
+TUNABLE_ULONG("vm.dma_reserved", &vm_dma_reserved);
+SYSCTL_ULONG(_vm, OID_AUTO, dma_reserved, CTLFLAG_RD, &vm_dma_reserved, 0,
+           "Memory reserved for DMA");
+SYSCTL_UINT(_vm, OID_AUTO, dma_free_pages, CTLFLAG_RD,
+           &vm_contig_alist.bl_free, 0, "Memory reserved for DMA");
 
 RB_GENERATE2(vm_page_rb_tree, vm_page, rb_entry, rb_vm_page_compare,
             vm_pindex_t, pindex);
@@ -147,7 +159,8 @@ vm_page_queue_init(void)
 long first_page = 0;
 int vm_page_array_size = 0;
 int vm_page_zero_count = 0;
-vm_page_t vm_page_array = 0;
+vm_page_t vm_page_array = NULL;
+vm_paddr_t vm_low_phys_reserved;
 
 /*
  * (low level boot)
@@ -177,7 +190,7 @@ vm_set_page_size(void)
  *
  * Must be called in a critical section.
  */
-static vm_page_t
+static void
 vm_add_new_page(vm_paddr_t pa)
 {
        struct vpgqueues *vpq;
@@ -195,6 +208,22 @@ vm_add_new_page(vm_paddr_t pa)
        m->pc ^= ((pa >> PAGE_SHIFT) / PQ_L2_SIZE) & PQ_L2_MASK;
        m->pc ^= ((pa >> PAGE_SHIFT) / (PQ_L2_SIZE * PQ_L2_SIZE)) & PQ_L2_MASK;
 #endif
+       /*
+        * Reserve a certain number of contiguous low memory pages for
+        * contigmalloc() to use.
+        */
+       if (pa < vm_low_phys_reserved) {
+               atomic_add_int(&vmstats.v_page_count, 1);
+               atomic_add_int(&vmstats.v_dma_pages, 1);
+               m->queue = PQ_NONE;
+               m->wire_count = 1;
+               alist_free(&vm_contig_alist, pa >> PAGE_SHIFT, 1);
+               return;
+       }
+
+       /*
+        * General page
+        */
        m->queue = m->pc + PQ_FREE;
        KKASSERT(m->dirty == 0);
 
@@ -211,8 +240,6 @@ vm_add_new_page(vm_paddr_t pa)
        }
        ++vpq->flipflop;
        ++vpq->lcnt;
-
-       return (m);
 }
 
 /*
@@ -275,7 +302,6 @@ vm_page_startup(void)
         * Initialize the queue headers for the free queue, the active queue
         * and the inactive queue.
         */
-
        vm_page_queue_init();
 
 #if !defined(_KERNEL_VIRTUAL)
@@ -300,7 +326,6 @@ vm_page_startup(void)
            VM_PROT_READ | VM_PROT_WRITE);
        bzero((void *)vm_page_dump, vm_page_dump_size);
 #endif
-
        /*
         * Compute the number of pages of memory that will be available for
         * use (taking into account the overhead of a page structure per
@@ -310,13 +335,36 @@ vm_page_startup(void)
        page_range = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE - first_page;
        npages = (total - (page_range * sizeof(struct vm_page))) / PAGE_SIZE;
 
+#ifndef _KERNEL_VIRTUAL
+       /*
+        * (only applies to real kernels)
+        *
+        * Initialize the contiguous reserve map.  We initially reserve up
+        * to 1/4 available physical memory or 65536 pages (~256MB), whichever
+        * is lower.
+        *
+        * Once device initialization is complete we return most of the
+        * reserved memory back to the normal page queues but leave some
+        * in reserve for things like usb attachments.
+        */
+       vm_low_phys_reserved = (vm_paddr_t)65536 << PAGE_SHIFT;
+       if (vm_low_phys_reserved > total / 4)
+               vm_low_phys_reserved = total / 4;
+       if (vm_dma_reserved == 0) {
+               vm_dma_reserved = 16 * 1024 * 1024;     /* 16MB */
+               if (vm_dma_reserved > total / 16)
+                       vm_dma_reserved = total / 16;
+       }
+#endif
+       alist_init(&vm_contig_alist, 65536, vm_contig_ameta,
+                  ALIST_RECORDS_65536);
+
        /*
         * Initialize the mem entry structures now, and put them in the free
         * queue.
         */
        new_end = trunc_page(end - page_range * sizeof(struct vm_page));
-       mapped = pmap_map(&vaddr, new_end, end,
-           VM_PROT_READ | VM_PROT_WRITE);
+       mapped = pmap_map(&vaddr, new_end, end, VM_PROT_READ | VM_PROT_WRITE);
        vm_page_array = (vm_page_t)mapped;
 
 #if defined(__x86_64__) && !defined(_KERNEL_VIRTUAL)
@@ -361,6 +409,96 @@ vm_page_startup(void)
 }
 
 /*
+ * We tended to reserve a ton of memory for contigmalloc().  Now that most
+ * drivers have initialized we want to return most the remaining free
+ * reserve back to the VM page queues so they can be used for normal
+ * allocations.
+ *
+ * We leave vm_dma_reserved bytes worth of free pages in the reserve pool.
+ */
+static void
+vm_page_startup_finish(void *dummy __unused)
+{
+       alist_blk_t blk;
+       alist_blk_t rblk;
+       alist_blk_t count;
+       alist_blk_t xcount;
+       alist_blk_t bfree;
+       vm_page_t m;
+
+       spin_lock(&vm_contig_spin);
+       for (;;) {
+               bfree = alist_free_info(&vm_contig_alist, &blk, &count);
+               if (bfree <= vm_dma_reserved / PAGE_SIZE)
+                       break;
+               if (count == 0)
+                       break;
+
+               /*
+                * Figure out how much of the initial reserve we have to
+                * free in order to reach our target.
+                */
+               bfree -= vm_dma_reserved / PAGE_SIZE;
+               if (count > bfree) {
+                       blk += count - bfree;
+                       count = bfree;
+               }
+
+               /*
+                * Calculate the nearest power of 2 <= count.
+                */
+               for (xcount = 1; xcount <= count; xcount <<= 1)
+                       ;
+               xcount >>= 1;
+               blk += count - xcount;
+               count = xcount;
+
+               /*
+                * Allocate the pages from the alist, then free them to
+                * the normal VM page queues.
+                *
+                * Pages allocated from the alist are wired.  We have to
+                * busy, unwire, and free them.  We must also adjust
+                * vm_low_phys_reserved before freeing any pages to prevent
+                * confusion.
+                */
+               rblk = alist_alloc(&vm_contig_alist, blk, count);
+               if (rblk != blk) {
+                       kprintf("vm_page_startup_finish: Unable to return "
+                               "dma space @0x%08x/%d -> 0x%08x\n",
+                               blk, count, rblk);
+                       break;
+               }
+               atomic_add_int(&vmstats.v_dma_pages, -count);
+               spin_unlock(&vm_contig_spin);
+
+               m = PHYS_TO_VM_PAGE((vm_paddr_t)blk << PAGE_SHIFT);
+               vm_low_phys_reserved = VM_PAGE_TO_PHYS(m);
+               while (count) {
+                       vm_page_busy_wait(m, FALSE, "cpgfr");
+                       vm_page_unwire(m, 0);
+                       vm_page_free(m);
+                       --count;
+                       ++m;
+               }
+               spin_lock(&vm_contig_spin);
+       }
+       spin_unlock(&vm_contig_spin);
+
+       /*
+        * Print out how much DMA space drivers have already allocated and
+        * how much is left over.
+        */
+       kprintf("DMA space used: %jdk, remaining available: %jdk\n",
+               (intmax_t)(vmstats.v_dma_pages - vm_contig_alist.bl_free) *
+               (PAGE_SIZE / 1024),
+               (intmax_t)vm_contig_alist.bl_free * (PAGE_SIZE / 1024));
+}
+SYSINIT(vm_pgend, SI_SUB_PROC0_POST, SI_ORDER_ANY,
+       vm_page_startup_finish, NULL)
+
+
+/*
  * Scan comparison function for Red-Black tree scans.  An inclusive
  * (start,end) is expected.  Other fields are not used.
  */
@@ -1523,6 +1661,88 @@ done:
 }
 
 /*
+ * Attempt to allocate contiguous physical memory with the specified
+ * requirements.
+ */
+vm_page_t
+vm_page_alloc_contig(vm_paddr_t low, vm_paddr_t high,
+                    unsigned long alignment, unsigned long boundary,
+                    unsigned long size)
+{
+       alist_blk_t blk;
+
+       alignment >>= PAGE_SHIFT;
+       if (alignment == 0)
+               alignment = 1;
+       boundary >>= PAGE_SHIFT;
+       if (boundary == 0)
+               boundary = 1;
+       size = (size + PAGE_MASK) >> PAGE_SHIFT;
+
+       spin_lock(&vm_contig_spin);
+       blk = alist_alloc(&vm_contig_alist, 0, size);
+       if (blk == ALIST_BLOCK_NONE) {
+               spin_unlock(&vm_contig_spin);
+               if (bootverbose) {
+                       kprintf("vm_page_alloc_contig: %ldk nospace\n",
+                               (size + PAGE_MASK) * (PAGE_SIZE / 1024));
+               }
+               return(NULL);
+       }
+       if (high && ((vm_paddr_t)(blk + size) << PAGE_SHIFT) > high) {
+               alist_free(&vm_contig_alist, blk, size);
+               spin_unlock(&vm_contig_spin);
+               if (bootverbose) {
+                       kprintf("vm_page_alloc_contig: %ldk high "
+                               "%016jx failed\n",
+                               (size + PAGE_MASK) * (PAGE_SIZE / 1024),
+                               (intmax_t)high);
+               }
+               return(NULL);
+       }
+       spin_unlock(&vm_contig_spin);
+       if (bootverbose) {
+               kprintf("vm_page_alloc_contig: %016jx/%ldk\n",
+                       (intmax_t)(vm_paddr_t)blk << PAGE_SHIFT,
+                       (size + PAGE_MASK) * (PAGE_SIZE / 1024));
+       }
+       return (PHYS_TO_VM_PAGE((vm_paddr_t)blk << PAGE_SHIFT));
+}
+
+/*
+ * Free contiguously allocated pages.  The pages will be wired but not busy.
+ * When freeing to the alist we leave them wired and not busy.
+ */
+void
+vm_page_free_contig(vm_page_t m, unsigned long size)
+{
+       vm_paddr_t pa = VM_PAGE_TO_PHYS(m);
+       vm_pindex_t start = pa >> PAGE_SHIFT;
+       vm_pindex_t pages = (size + PAGE_MASK) >> PAGE_SHIFT;
+
+       if (bootverbose) {
+               kprintf("vm_page_free_contig:  %016jx/%ldk\n",
+                       (intmax_t)pa, size / 1024);
+       }
+       if (pa < vm_low_phys_reserved) {
+               KKASSERT(pa + size <= vm_low_phys_reserved);
+               spin_lock(&vm_contig_spin);
+               alist_free(&vm_contig_alist, start, pages);
+               spin_unlock(&vm_contig_spin);
+       } else {
+               while (pages) {
+                       vm_page_busy_wait(m, FALSE, "cpgfr");
+                       vm_page_unwire(m, 0);
+                       vm_page_free(m);
+                       --pages;
+                       ++m;
+               }
+
+       }
+}
+
+
+/*
  * Wait for sufficient free memory for nominal heavy memory use kernel
  * operations.
  */
@@ -1713,10 +1933,10 @@ vm_page_free_toq(vm_page_t m)
        KKASSERT(m->flags & PG_BUSY);
 
        if (m->busy || ((m->queue - m->pc) == PQ_FREE)) {
-               kprintf(
-               "vm_page_free: pindex(%lu), busy(%d), PG_BUSY(%d), hold(%d)\n",
-                   (u_long)m->pindex, m->busy, (m->flags & PG_BUSY) ? 1 : 0,
-                   m->hold_count);
+               kprintf("vm_page_free: pindex(%lu), busy(%d), "
+                       "PG_BUSY(%d), hold(%d)\n",
+                       (u_long)m->pindex, m->busy,
+                       ((m->flags & PG_BUSY) ? 1 : 0), m->hold_count);
                if ((m->queue - m->pc) == PQ_FREE)
                        panic("vm_page_free: freeing free page");
                else
index 41d6c1f..a985cdc 100644 (file)
@@ -438,6 +438,9 @@ void vm_page_unhold(vm_page_t);
 void vm_page_activate (vm_page_t);
 void vm_page_pcpu_cache(void);
 vm_page_t vm_page_alloc (struct vm_object *, vm_pindex_t, int);
+vm_page_t vm_page_alloc_contig(vm_paddr_t low, vm_paddr_t high,
+                     unsigned long alignment, unsigned long boundary,
+                    unsigned long size);
 vm_page_t vm_page_grab (struct vm_object *, vm_pindex_t, int);
 void vm_page_cache (vm_page_t);
 int vm_page_try_to_cache (vm_page_t);
@@ -447,9 +450,12 @@ void vm_page_deactivate (vm_page_t);
 void vm_page_deactivate_locked (vm_page_t);
 int vm_page_insert (vm_page_t, struct vm_object *, vm_pindex_t);
 vm_page_t vm_page_lookup (struct vm_object *, vm_pindex_t);
-vm_page_t VM_PAGE_DEBUG_EXT(vm_page_lookup_busy_wait)(struct vm_object *, vm_pindex_t,
-                               int, const char * VM_PAGE_DEBUG_ARGS);
-vm_page_t VM_PAGE_DEBUG_EXT(vm_page_lookup_busy_try)(struct vm_object *, vm_pindex_t, int, int * VM_PAGE_DEBUG_ARGS);
+vm_page_t VM_PAGE_DEBUG_EXT(vm_page_lookup_busy_wait)(
+               struct vm_object *, vm_pindex_t, int, const char *
+               VM_PAGE_DEBUG_ARGS);
+vm_page_t VM_PAGE_DEBUG_EXT(vm_page_lookup_busy_try)(
+               struct vm_object *, vm_pindex_t, int, int *
+               VM_PAGE_DEBUG_ARGS);
 void vm_page_remove (vm_page_t);
 void vm_page_rename (vm_page_t, struct vm_object *, vm_pindex_t);
 void vm_page_startup (void);
@@ -471,6 +477,7 @@ int vm_page_bits (int, int);
 vm_page_t vm_page_list_find(int basequeue, int index, boolean_t prefer_zero);
 void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid);
 void vm_page_free_toq(vm_page_t m);
+void vm_page_free_contig(vm_page_t m, unsigned long size);
 vm_page_t vm_page_free_fromq_fast(void);
 void vm_page_event_internal(vm_page_t, vm_page_event_t);
 void vm_page_dirty(vm_page_t m);