kernel - Add /dev/upmap and /dev/kpmap and sys/upmap.h
authorMatthew Dillon <dillon@apollo.backplane.com>
Thu, 16 Oct 2014 19:35:05 +0000 (12:35 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Thu, 16 Oct 2014 19:51:14 +0000 (12:51 -0700)
* Add two memory-mappable devices for accessing a per-process and global
  kernel shared memory space.  These can be mapped to acquire certain
  information from the kernel that would normally require a system call
  in a more efficient manner.

  Userland programs using this feature should NOT directly map the sys_upmap
  and sys_kpmap structures (which is why they are in #ifdef _KERNEL sections
  in sys/upmap.h).  Instead, mmap the devices using UPMAP_MAPSIZE and
  KPMAP_MAPSIZE and parse the ukpheader[] array at the front of each area
  to locate the desired fields.  You can then simply cache a pointer to
  the desired field.

  The width of the field is encoded in the UPTYPE/KPTYPE elements and
  can be asserted if desired, user programs are not expected to handle
  integers of multiple sizes for the same field type.

* Add /dev/upmap.  A program can open and mmap() this device R+W and use
  it to access:

  header[...] - See sys/upmap.h.  An array of headers terminating with
  a type=0 header indicating where various fields are in
  the mapping.  This should be used by userland instead
  of directly mapping to the struct sys_upmap structure.

  version - The sys_upmap version, typically 1.

  runticks - Scheduler run ticks (aggregate, all threads).  This
  may be used by userland interpreters to determine
  when to soft-switch.

  forkid - A unique non-zero 64-bit fork identifier.  This is NOT a
  pid.  This may be used by userland libraries to determine
  if a fork has occurred by comparing against a stored
  value.

  pid - The current process pid.  This may be used to acquire the
  process pid without having to make further system calls.

  proc_title - This starts out as an empty buffer and may be used to set
  the process title.  To revert to the original process title,
  set proc_title[0] to 0.

  NOTE!  Userland may write to the entire buffer, but it is recommended
 that userland only write to fields intended to be writable.

  NOTE!  When a program forks, an area already mmap()d remains mmap()d but
 will point to the new process's area and not the old, so libraries
 do not need to do anything special atfork.

  NOTE!  Access to this structure is cpu localized.

* Add /dev/kpmap.  A program can open and mmap() this device RO and use
  it to access:

  header[...] - See sys/upmap.h.  An array of headers terminating with
  a type=0 header indicating where various fields are in
  the mapping.  This should be used by userland instead
  of directly mapping to the struct sys_upmap structure.

  version - The sys_kpmap version, typically 1.

  upticks - System uptime tick counter (32 bit integer).  Monotonic,
  uncompensated.

  ts_uptime - System uptime in struct timespec format at tick-resolution.
  Monotonic, uncompensated.

  ts_realtime - System realtime in struct timespec format at tick-resolution.
  This is compensated so reverse-indexing is possible.

  tsc_freq - If the system supports a TSC of some sort, the TSC
  frequency is recorded here, else 0.

  tick_freq - The tick resolution of ts_uptime and ts_realtime and
  approximate tick resolution for the scheduler.  Typically
  100.

  NOTE!  Userland may only read from this buffer.

  NOTE!  Access to this structure is NOT cpu localized.  A memory fence
 and double-check should be used when accessing non-atomic structures
 which might change such as ts_uptime and ts_realtime.

 XXX needs work.

30 files changed:
sys/dev/drm/i915/i915_gem.c
sys/emulation/linux/i386/imgact_linux.c
sys/emulation/linux/linux_misc.c
sys/kern/imgact_aout.c
sys/kern/imgact_elf.c
sys/kern/imgact_gzip.c
sys/kern/init_main.c
sys/kern/kern_clock.c
sys/kern/kern_exit.c
sys/kern/kern_fork.c
sys/kern/kern_memio.c
sys/kern/kern_proc.c
sys/kern/kern_slaballoc.c
sys/kern/link_elf_obj.c
sys/kern/sys_pipe.c
sys/kern/sys_process.c
sys/kern/sysv_shm.c
sys/kern/vfs_bio.c
sys/sys/device.h
sys/sys/globaldata.h
sys/sys/proc.h
sys/sys/upmap.h [new file with mode: 0644]
sys/vfs/procfs/procfs_status.c
sys/vm/vm.h
sys/vm/vm_fault.c
sys/vm/vm_kern.c
sys/vm/vm_map.c
sys/vm/vm_map.h
sys/vm/vm_mmap.c
sys/vm/vm_unix.c

index a7364ba..3195a25 100644 (file)
@@ -701,13 +701,14 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
        vm_object_hold(obj->vm_obj);
        vm_object_reference_locked(obj->vm_obj);
        vm_object_drop(obj->vm_obj);
-       rv = vm_map_find(map, obj->vm_obj, args->offset, &addr, args->size,
-           PAGE_SIZE, /* align */
-           TRUE, /* fitit */
-           VM_MAPTYPE_NORMAL, /* maptype */
-           VM_PROT_READ | VM_PROT_WRITE, /* prot */
-           VM_PROT_READ | VM_PROT_WRITE, /* max */
-           MAP_SHARED /* cow */);
+       rv = vm_map_find(map, obj->vm_obj, NULL,
+                        args->offset, &addr, args->size,
+                        PAGE_SIZE, /* align */
+                        TRUE, /* fitit */
+                        VM_MAPTYPE_NORMAL, /* maptype */
+                        VM_PROT_READ | VM_PROT_WRITE, /* prot */
+                        VM_PROT_READ | VM_PROT_WRITE, /* max */
+                        MAP_SHARED /* cow */);
        if (rv != KERN_SUCCESS) {
                vm_object_deallocate(obj->vm_obj);
                error = -vm_mmap_to_errno(rv);
index 7369ed6..cd272bc 100644 (file)
@@ -127,7 +127,8 @@ exec_linux_imgact(struct image_params *imgp)
         * Map text+data+bss read/write/execute
         */
        vmaddr = virtual_offset;
-       error = vm_map_find(&vmspace->vm_map, NULL, 0, &vmaddr,
+       error = vm_map_find(&vmspace->vm_map, NULL, NULL,
+                           0, &vmaddr,
                            a_out->a_text + a_out->a_data + bss_size,
                            PAGE_SIZE,
                            FALSE, VM_MAPTYPE_NORMAL,
@@ -199,7 +200,8 @@ exec_linux_imgact(struct image_params *imgp)
         */
        if (bss_size != 0) {
            vmaddr = virtual_offset + a_out->a_text + a_out->a_data;
-           error = vm_map_find(&vmspace->vm_map, NULL, 0, &vmaddr, 
+           error = vm_map_find(&vmspace->vm_map, NULL, NULL,
+                               0, &vmaddr,
                                bss_size, PAGE_SIZE,
                                FALSE, VM_MAPTYPE_NORMAL,
                                VM_PROT_ALL, VM_PROT_ALL,
index 851bdec..3d15750 100644 (file)
@@ -415,12 +415,11 @@ sys_linux_uselib(struct linux_uselib_args *args)
                vmaddr = trunc_page(a_out->a_entry);
 
                /* get anon user mapping, read+write+execute */
-               error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0,
-                                   &vmaddr, a_out->a_text + a_out->a_data,
+               error = vm_map_find(&p->p_vmspace->vm_map, NULL, NULL,
+                                   0, &vmaddr, a_out->a_text + a_out->a_data,
                                    PAGE_SIZE,
                                    FALSE, VM_MAPTYPE_NORMAL,
-                                   VM_PROT_ALL, VM_PROT_ALL,
-                                   0);
+                                   VM_PROT_ALL, VM_PROT_ALL, 0);
                if (error)
                        goto cleanup;
 
@@ -472,12 +471,11 @@ sys_linux_uselib(struct linux_uselib_args *args)
                    a_out->a_data;
 
                /* allocate some 'anon' space */
-               error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0,
-                                   &vmaddr, bss_size,
+               error = vm_map_find(&p->p_vmspace->vm_map, NULL, NULL,
+                                   0, &vmaddr, bss_size,
                                    PAGE_SIZE,
                                    FALSE, VM_MAPTYPE_NORMAL,
-                                   VM_PROT_ALL, VM_PROT_ALL,
-                                   0);
+                                   VM_PROT_ALL, VM_PROT_ALL, 0);
                if (error)
                        goto cleanup;
        }
index 89ca8ff..d27009f 100644 (file)
@@ -184,7 +184,7 @@ exec_aout_imgact(struct image_params *imgp)
        vm_object_reference_locked(object);
 
        text_end = virtual_offset + a_out->a_text;
-       error = vm_map_insert(map, &count, object,
+       error = vm_map_insert(map, &count, object, NULL,
                file_offset,
                virtual_offset, text_end,
                VM_MAPTYPE_NORMAL,
@@ -201,7 +201,7 @@ exec_aout_imgact(struct image_params *imgp)
        data_end = text_end + a_out->a_data;
        if (a_out->a_data) {
                vm_object_reference_locked(object);
-               error = vm_map_insert(map, &count, object,
+               error = vm_map_insert(map, &count, object, NULL,
                        file_offset + a_out->a_text,
                        text_end, data_end,
                        VM_MAPTYPE_NORMAL,
@@ -217,8 +217,8 @@ exec_aout_imgact(struct image_params *imgp)
        vm_object_drop(object);
 
        if (bss_size) {
-               error = vm_map_insert(map, &count, NULL, 0,
-                       data_end, data_end + bss_size,
+               error = vm_map_insert(map, &count, NULL, NULL,
+                       0, data_end, data_end + bss_size,
                        VM_MAPTYPE_NORMAL,
                        VM_PROT_ALL, VM_PROT_ALL,
                        0);
index 33b1c0a..b0e0460 100644 (file)
@@ -304,7 +304,7 @@ __elfN(load_section)(struct proc *p, struct vmspace *vmspace, struct vnode *vp,
                count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
                vm_map_lock(&vmspace->vm_map);
                rv = vm_map_insert(&vmspace->vm_map, &count,
-                                     object,
+                                     object, NULL,
                                      file_addr,        /* file offset */
                                      map_addr,         /* virtual start */
                                      map_addr + map_len,/* virtual end */
@@ -346,8 +346,10 @@ __elfN(load_section)(struct proc *p, struct vmspace *vmspace, struct vnode *vp,
                count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
                vm_map_lock(&vmspace->vm_map);
                rv = vm_map_insert(&vmspace->vm_map, &count,
-                                       NULL, 0,
-                                       map_addr, map_addr + map_len,
+                                       NULL, NULL,
+                                       0,
+                                       map_addr,
+                                       map_addr + map_len,
                                        VM_MAPTYPE_NORMAL,
                                        VM_PROT_ALL, VM_PROT_ALL,
                                        0);
index 17625ab..5096495 100644 (file)
@@ -247,11 +247,10 @@ do_aout_hdr(struct imgact_gzip * gz)
                vmaddr = gz->virtual_offset + gz->a_out.a_text + 
                        gz->a_out.a_data;
                error = vm_map_find(&vmspace->vm_map,
-                                   NULL, 0,
-                                   &vmaddr, gz->bss_size, PAGE_SIZE,
+                                   NULL, NULL,
+                                   0, &vmaddr, gz->bss_size, PAGE_SIZE,
                                    FALSE, VM_MAPTYPE_NORMAL,
-                                   VM_PROT_ALL, VM_PROT_ALL,
-                                   0);
+                                   VM_PROT_ALL, VM_PROT_ALL, 0);
                if (error) {
                        gz->where = __LINE__;
                        return (error);
index b1c20a3..f09bfdf 100644 (file)
@@ -90,6 +90,7 @@ struct proc *initproc;
 struct proc proc0;
 struct lwp lwp0;
 struct thread thread0;
+struct sys_kpmap *kpmap;
 
 int cmask = CMASK;
 u_int cpu_mi_feature;
@@ -583,11 +584,11 @@ start_init(void *dummy, struct trapframe *frame)
         * Need just enough stack to hold the faked-up "execve()" arguments.
         */
        addr = trunc_page(USRSTACK - PAGE_SIZE);
-       error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr,
-                           PAGE_SIZE, PAGE_SIZE,
+       error = vm_map_find(&p->p_vmspace->vm_map, NULL, NULL,
+                           0, &addr, PAGE_SIZE,
+                           PAGE_SIZE,
                            FALSE, VM_MAPTYPE_NORMAL,
-                           VM_PROT_ALL, VM_PROT_ALL,
-                           0);
+                           VM_PROT_ALL, VM_PROT_ALL, 0);
        if (error)
                panic("init: couldn't allocate argument space");
        p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
@@ -717,6 +718,28 @@ kick_init(const void *udata __unused)
 }
 SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, kick_init, NULL)
 
+static void
+kpmap_init(const void *udata __unused)
+{
+       kpmap = kmalloc(roundup2(sizeof(*kpmap), PAGE_SIZE),
+                       M_TEMP, M_ZERO | M_WAITOK);
+
+       kpmap->header[0].type = UKPTYPE_VERSION;
+       kpmap->header[0].offset = offsetof(struct sys_kpmap, version);
+       kpmap->header[1].type = KPTYPE_UPTICKS;
+       kpmap->header[1].offset = offsetof(struct sys_kpmap, upticks);
+       kpmap->header[2].type = KPTYPE_TS_UPTIME;
+       kpmap->header[2].offset = offsetof(struct sys_kpmap, ts_uptime);
+       kpmap->header[3].type = KPTYPE_TS_REALTIME;
+       kpmap->header[3].offset = offsetof(struct sys_kpmap, ts_realtime);
+       kpmap->header[4].type = KPTYPE_TSC_FREQ;
+       kpmap->header[4].offset = offsetof(struct sys_kpmap, tsc_freq);
+       kpmap->header[5].type = KPTYPE_TICK_FREQ;
+       kpmap->header[5].offset = offsetof(struct sys_kpmap, tick_freq);
+       kpmap->version = KPMAP_VERSION;
+}
+SYSINIT(kpmapinit, SI_BOOT1_POST, SI_ORDER_FIRST, kpmap_init, NULL)
+
 /*
  * Machine independant globaldata initialization
  *
index 0ec5edc..81ee172 100644 (file)
@@ -84,6 +84,7 @@
 #include <sys/signalvar.h>
 #include <sys/timex.h>
 #include <sys/timepps.h>
+#include <sys/upmap.h>
 #include <vm/vm.h>
 #include <sys/lock.h>
 #include <vm/pmap.h>
@@ -253,6 +254,10 @@ initclocks(void *dummy)
        /*psratio = profhz / stathz;*/
        initclocks_pcpu();
        clocks_running = 1;
+       if (kpmap) {
+           kpmap->tsc_freq = (uint64_t)tsc_frequency;
+           kpmap->tick_freq = hz;
+       }
 }
 
 /*
@@ -551,6 +556,14 @@ hardclock(systimer_t info, int in_ipi __unused, struct intrframe *frame)
             */
            cpu_sfence();
            basetime_index = ni;
+
+           /*
+            * Update kpmap on each tick
+            */
+           if (kpmap) {
+               getnanouptime(&kpmap->ts_uptime);
+               getnanotime(&kpmap->ts_realtime);
+           }
        }
 
        /*
@@ -576,6 +589,9 @@ hardclock(systimer_t info, int in_ipi __unused, struct intrframe *frame)
         */
        if ((p = curproc) != NULL && lwkt_trytoken(&p->p_token)) {
                crit_enter_hard();
+               if (p->p_upmap)
+                       ++p->p_upmap->runticks;
+
                if (frame && CLKF_USERMODE(frame) &&
                    timevalisset(&p->p_timer[ITIMER_VIRTUAL].it_value) &&
                    itimerdecr(&p->p_timer[ITIMER_VIRTUAL], ustick) == 0) {
index 85290fb..9a20180 100644 (file)
@@ -764,6 +764,7 @@ lwp_wait(struct lwp *lp)
         * will be cleared temporarily if a thread gets preempted.
         */
        while ((td->td_flags & (TDF_RUNNING |
+                               TDF_RUNQ |
                                TDF_PREEMPT_LOCK |
                                TDF_EXITING)) != TDF_EXITING) {
                tsleep(lp, 0, "lwpwait3", 1);
@@ -788,6 +789,7 @@ lwp_dispose(struct lwp *lp)
        KKASSERT(lwkt_preempted_proc() != lp);
        KKASSERT(td->td_refs == 0);
        KKASSERT((td->td_flags & (TDF_RUNNING |
+                                 TDF_RUNQ |
                                  TDF_PREEMPT_LOCK |
                                  TDF_EXITING)) == TDF_EXITING);
 
@@ -984,6 +986,7 @@ loop:
                         * the zombie list.
                         */
                        proc_remove_zombie(p);
+                       proc_userunmap(p);
                        lwkt_reltoken(&p->p_token);
                        leavepgrp(p);
 
index b5fbc4b..9c4d98c 100644 (file)
@@ -379,6 +379,8 @@ fork1(struct lwp *lp1, int flags, struct proc **procp)
         * once the process is on the allproc list to avoid things such
         * as competing modifications to p_flags.
         */
+       mycpu->gd_forkid += ncpus;
+       p2->p_forkid = mycpu->gd_forkid + mycpu->gd_cpuid;
        p2->p_lasttid = -1;     /* first tid will be 0 */
        p2->p_stat = SIDL;
 
index 3f3d46c..8f391e7 100644 (file)
@@ -71,8 +71,11 @@ static       d_close_t       mmclose;
 static d_read_t        mmread;
 static d_write_t       mmwrite;
 static d_ioctl_t       mmioctl;
+#if 0
 static d_mmap_t        memmmap;
+#endif
 static d_kqfilter_t    mmkqfilter;
+static int memuksmap(cdev_t dev, vm_page_t fake);
 
 #define CDEV_MAJOR 2
 static struct dev_ops mem_ops = {
@@ -83,7 +86,10 @@ static struct dev_ops mem_ops = {
        .d_write =      mmwrite,
        .d_ioctl =      mmioctl,
        .d_kqfilter =   mmkqfilter,
+#if 0
        .d_mmap =       memmmap,
+#endif
+       .d_uksmap =     memuksmap
 };
 
 static int rand_bolt;
@@ -283,6 +289,8 @@ mmrw(cdev_t dev, struct uio *uio, int flags)
                        c = min(c, poolsize);
                        error = uiomove(buf, (int)c, uio);
                        continue;
+               /* case 5: read/write not supported, mmap only */
+               /* case 6: read/write not supported, mmap only */
                case 12:
                        /*
                         * minor device 12 (/dev/zero) is source of nulls 
@@ -326,45 +334,94 @@ mmwrite(struct dev_write_args *ap)
        return(mmrw(ap->a_head.a_dev, ap->a_uio, ap->a_ioflag));
 }
 
-
-
-
-
 /*******************************************************\
 * allow user processes to MMAP some memory sections    *
 * instead of going through read/write                  *
 \*******************************************************/
 
+static int user_kernel_mapping(int num, vm_ooffset_t offset,
+                               vm_ooffset_t *resultp);
+
+#if 0
+
 static int
 memmmap(struct dev_mmap_args *ap)
 {
        cdev_t dev = ap->a_head.a_dev;
+       vm_ooffset_t result;
+       int error;
 
        switch (minor(dev)) {
        case 0:
                /* 
                 * minor device 0 is physical memory 
                 */
-#if defined(__i386__)
-               ap->a_result = i386_btop(ap->a_offset);
-#elif defined(__x86_64__)
-               ap->a_result = x86_64_btop(ap->a_offset);
-#endif
-               return 0;
+               ap->a_result = atop(ap->a_offset);
+               error = 0;
+               break;
        case 1:
                /*
                 * minor device 1 is kernel memory 
                 */
-#if defined(__i386__)
-               ap->a_result = i386_btop(vtophys(ap->a_offset));
-#elif defined(__x86_64__)
-               ap->a_result = x86_64_btop(vtophys(ap->a_offset));
+               ap->a_result = atop(vtophys(ap->a_offset));
+               error = 0;
+               break;
+       case 5:
+       case 6:
+               /*
+                * minor device 5 is /dev/upmap (see sys/upmap.h)
+                * minor device 6 is /dev/kpmap (see sys/upmap.h)
+                */
+               result = 0;
+               error = user_kernel_mapping(minor(dev), ap->a_offset, &result);
+               ap->a_result = atop(result);
+               break;
+       default:
+               error = EINVAL;
+               break;
+       }
+       return error;
+}
+
 #endif
-               return 0;
 
+static int
+memuksmap(cdev_t dev, vm_page_t fake)
+{
+       vm_ooffset_t result;
+       int error;
+
+       switch (minor(dev)) {
+       case 0:
+               /*
+                * minor device 0 is physical memory
+                */
+               fake->phys_addr = ptoa(fake->pindex);
+               error = 0;
+               break;
+       case 1:
+               /*
+                * minor device 1 is kernel memory
+                */
+               fake->phys_addr = vtophys(ptoa(fake->pindex));
+               error = 0;
+               break;
+       case 5:
+       case 6:
+               /*
+                * minor device 5 is /dev/upmap (see sys/upmap.h)
+                * minor device 6 is /dev/kpmap (see sys/upmap.h)
+                */
+               result = 0;
+               error = user_kernel_mapping(minor(dev),
+                                           ptoa(fake->pindex), &result);
+               fake->phys_addr = result;
+               break;
        default:
-               return EINVAL;
+               error = EINVAL;
+               break;
        }
+       return error;
 }
 
 static int
@@ -601,6 +658,47 @@ iszerodev(cdev_t dev)
        return (zerodev == dev);
 }
 
+/*
+ * /dev/upmap and /dev/kpmap.
+ */
+static int
+user_kernel_mapping(int num, vm_ooffset_t offset, vm_ooffset_t *resultp)
+{
+       struct proc *p = curproc;
+       int error;
+
+       if (p == NULL)
+               return (EINVAL);
+       error = EINVAL;
+
+       switch(num) {
+       case 5:
+               /*
+                * /dev/upmap - maps RW per-process shared user-kernel area.
+                */
+               if (p->p_upmap == NULL)
+                       proc_usermap(p);
+               if (p->p_upmap && offset == 0) {
+                       /* only good for current process */
+                       *resultp = pmap_kextract((vm_offset_t)p->p_upmap);
+                       error = 0;
+               }
+               break;
+       case 6:
+               /*
+                * /dev/kpmap - maps RO shared kernel global page
+                */
+               if (kpmap && offset == 0) {
+                       *resultp = pmap_kextract((vm_offset_t)kpmap);
+                       error = 0;
+               }
+               break;
+       default:
+               break;
+       }
+       return error;
+}
+
 static void
 mem_drvinit(void *unused)
 {
@@ -614,6 +712,8 @@ mem_drvinit(void *unused)
        make_dev(&mem_ops, 2, UID_ROOT, GID_WHEEL, 0666, "null");
        make_dev(&mem_ops, 3, UID_ROOT, GID_WHEEL, 0644, "random");
        make_dev(&mem_ops, 4, UID_ROOT, GID_WHEEL, 0644, "urandom");
+       make_dev(&mem_ops, 5, UID_ROOT, GID_WHEEL, 0666, "upmap");
+       make_dev(&mem_ops, 6, UID_ROOT, GID_WHEEL, 0444, "kpmap");
        zerodev = make_dev(&mem_ops, 12, UID_ROOT, GID_WHEEL, 0666, "zero");
        make_dev(&mem_ops, 14, UID_ROOT, GID_WHEEL, 0600, "io");
 }
index ea9457c..950d2fa 100644 (file)
@@ -1154,6 +1154,49 @@ lwpkthreaddeferred(void)
        }
 }
 
+void
+proc_usermap(struct proc *p)
+{
+       struct sys_upmap *upmap;
+
+       lwkt_gettoken(&p->p_token);
+       upmap = kmalloc(roundup2(sizeof(*upmap), PAGE_SIZE), M_PROC,
+                       M_WAITOK | M_ZERO);
+       if (p->p_upmap == NULL) {
+               upmap->header[0].type = UKPTYPE_VERSION;
+               upmap->header[0].offset = offsetof(struct sys_upmap, version);
+               upmap->header[1].type = UPTYPE_RUNTICKS;
+               upmap->header[1].offset = offsetof(struct sys_upmap, runticks);
+               upmap->header[2].type = UPTYPE_FORKID;
+               upmap->header[2].offset = offsetof(struct sys_upmap, forkid);
+               upmap->header[3].type = UPTYPE_PID;
+               upmap->header[3].offset = offsetof(struct sys_upmap, pid);
+               upmap->header[4].type = UPTYPE_PROC_TITLE;
+               upmap->header[4].offset = offsetof(struct sys_upmap,proc_title);
+
+               upmap->version = UPMAP_VERSION;
+               upmap->pid = p->p_pid;
+               upmap->forkid = p->p_forkid;
+               p->p_upmap = upmap;
+       } else {
+               kfree(upmap, M_PROC);
+       }
+       lwkt_reltoken(&p->p_token);
+}
+
+void
+proc_userunmap(struct proc *p)
+{
+       struct sys_upmap *upmap;
+
+       lwkt_gettoken(&p->p_token);
+       if ((upmap = p->p_upmap) != NULL) {
+               p->p_upmap = NULL;
+               kfree(upmap, M_PROC);
+       }
+       lwkt_reltoken(&p->p_token);
+}
+
 /*
  * Scan all processes on the allproc list.  The process is automatically
  * held for the callback.  A return value of -1 terminates the loop.
@@ -1600,11 +1643,33 @@ sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS)
                error = EPERM;
                goto done;
        }
-       if (req->oldptr && (pa = p->p_args) != NULL) {
-               refcount_acquire(&pa->ar_ref);
-               error = SYSCTL_OUT(req, pa->ar_args, pa->ar_length);
-               if (refcount_release(&pa->ar_ref))
-                       kfree(pa, M_PARGS);
+       if (req->oldptr) {
+               if (p->p_upmap != NULL && p->p_upmap->proc_title[0]) {
+                       /*
+                        * Args set via writable user process mmap.
+                        * We must calculate the string length manually
+                        * because the user data can change at any time.
+                        */
+                       size_t n;
+                       char *base;
+
+                       base = p->p_upmap->proc_title;
+                       for (n = 0; n < UPMAP_MAXPROCTITLE - 1; ++n) {
+                               if (base[n] == 0)
+                                       break;
+                       }
+                       error = SYSCTL_OUT(req, base, n);
+                       if (error == 0)
+                               error = SYSCTL_OUT(req, "", 1);
+               } else if ((pa = p->p_args) != NULL) {
+                       /*
+                        * Args set by setproctitle() sysctl.
+                        */
+                       refcount_acquire(&pa->ar_ref);
+                       error = SYSCTL_OUT(req, pa->ar_args, pa->ar_length);
+                       if (refcount_release(&pa->ar_ref))
+                               kfree(pa, M_PARGS);
+               }
        }
        if (req->newptr == NULL)
                goto done;
index f40db03..28e014c 100644 (file)
@@ -1482,11 +1482,12 @@ kmem_slab_alloc(vm_size_t size, vm_offset_t align, int flags)
      */
     vm_object_hold(&kernel_object);
     vm_object_reference_locked(&kernel_object);
-    vm_map_insert(&kernel_map, &count, 
-                   &kernel_object, addr, addr, addr + size,
-                   VM_MAPTYPE_NORMAL,
-                   VM_PROT_ALL, VM_PROT_ALL,
-                   0);
+    vm_map_insert(&kernel_map, &count,
+                 &kernel_object, NULL,
+                 addr, addr, addr + size,
+                 VM_MAPTYPE_NORMAL,
+                 VM_PROT_ALL, VM_PROT_ALL,
+                 0);
     vm_object_drop(&kernel_object);
     vm_map_set_wired_quick(&kernel_map, addr, size, &count);
     vm_map_unlock(&kernel_map);
index e9bdcd1..f37ce42 100644 (file)
@@ -656,8 +656,9 @@ link_elf_obj_load_file(const char *filename, linker_file_t * result)
        vm_object_drop(ef->object);
 #else
        mapbase = KERNBASE;
-       error = vm_map_find(&kernel_map, ef->object, 0, &mapbase,
-                           round_page(mapsize), PAGE_SIZE,
+       error = vm_map_find(&kernel_map, ef->object, NULL,
+                           0, &mapbase, round_page(mapsize),
+                           PAGE_SIZE,
                            TRUE, VM_MAPTYPE_NORMAL,
                            VM_PROT_ALL, VM_PROT_ALL, FALSE);
        vm_object_drop(ef->object);
index 3380451..2ea4fa6 100644 (file)
@@ -321,12 +321,11 @@ pipespace(struct pipe *cpipe, int size)
                object = vm_object_allocate(OBJT_DEFAULT, npages);
                buffer = (caddr_t)vm_map_min(&kernel_map);
 
-               error = vm_map_find(&kernel_map, object, 0,
-                                   (vm_offset_t *)&buffer,
-                                   size, PAGE_SIZE,
+               error = vm_map_find(&kernel_map, object, NULL,
+                                   0, (vm_offset_t *)&buffer, size,
+                                   PAGE_SIZE,
                                    1, VM_MAPTYPE_NORMAL,
-                                   VM_PROT_ALL, VM_PROT_ALL,
-                                   0);
+                                   VM_PROT_ALL, VM_PROT_ALL, 0);
 
                if (error != KERN_SUCCESS) {
                        vm_object_deallocate(object);
index 098499d..26d04cd 100644 (file)
@@ -85,12 +85,11 @@ pread (struct proc *procp, unsigned int addr, unsigned int *retval) {
        vm_map_lookup_done (tmap, out_entry, 0);
 
        /* Find space in kernel_map for the page we're interested in */
-       rv = vm_map_find (&kernel_map, object, IDX_TO_OFF(pindex),
-                         &kva,
-                         PAGE_SIZE, PAGE_SIZE,
+       rv = vm_map_find (&kernel_map, object, NULL,
+                         IDX_TO_OFF(pindex), &kva, PAGE_SIZE,
+                         PAGE_SIZE,
                          0, VM_MAPTYPE_NORMAL,
-                         VM_PROT_ALL, VM_PROT_ALL,
-                         0);
+                         VM_PROT_ALL, VM_PROT_ALL, 0);
 
        if (!rv) {
                vm_object_reference XXX (object);
@@ -172,12 +171,11 @@ pwrite (struct proc *procp, unsigned int addr, unsigned int datum) {
                return EFAULT;
 
        /* Find space in kernel_map for the page we're interested in */
-       rv = vm_map_find (&kernel_map, object, IDX_TO_OFF(pindex),
-                         &kva,
-                         PAGE_SIZE, PAGE_SIZE,
+       rv = vm_map_find (&kernel_map, object, NULL,
+                         IDX_TO_OFF(pindex), &kva, PAGE_SIZE,
+                         PAGE_SIZE,
                          0, VM_MAPTYPE_NORMAL,
-                         VM_PROT_ALL, VM_PROT_ALL,
-                         0);
+                         VM_PROT_ALL, VM_PROT_ALL, 0);
        if (!rv) {
                vm_object_reference XXX (object);
 
index 8edd773..dc971ad 100644 (file)
@@ -329,13 +329,12 @@ again:
        vm_object_chain_wait(shm_handle->shm_object, 0);
        vm_object_reference_locked(shm_handle->shm_object);
        rv = vm_map_find(&p->p_vmspace->vm_map, 
-                        shm_handle->shm_object, 0,
-                        &attach_va,
-                        size, align,
+                        shm_handle->shm_object, NULL,
+                        0, &attach_va, size,
+                        align,
                         ((flags & MAP_FIXED) ? 0 : 1), 
                         VM_MAPTYPE_NORMAL,
-                        prot, prot,
-                        0);
+                        prot, prot, 0);
        vm_object_drop(shm_handle->shm_object);
        if (rv != KERN_SUCCESS) {
                 vm_object_deallocate(shm_handle->shm_object);
index 45ef3db..5588f2d 100644 (file)
@@ -2334,8 +2334,8 @@ restart:
                        }
                        if (addr) {
                                vm_map_insert(&buffer_map, &count,
-                                       NULL, 0,
-                                       addr, addr + maxsize,
+                                       NULL, NULL,
+                                       0, addr, addr + maxsize,
                                        VM_MAPTYPE_NORMAL,
                                        VM_PROT_ALL, VM_PROT_ALL,
                                        MAP_NOFAULT);
index 0219f2c..b9aca6c 100644 (file)
@@ -48,6 +48,7 @@
 struct cdev;
 struct ucred;
 struct devfs_bitmap;
+struct vm_page;
 
 /*
  * This structure is at the base of every device args structure
@@ -247,7 +248,8 @@ struct dev_ops {
        d_kqfilter_t    *d_kqfilter;
        d_clone_t       *d_clone;       /* clone from base dev_ops */
        d_revoke_t      *d_revoke;
-#define dev_ops_last_field     d_revoke
+       int (*d_uksmap)(struct cdev *dev, struct vm_page *fake);
+#define dev_ops_last_field     d_uksmap
 };
 
 /*
index 0042d38..cbe6be3 100644 (file)
@@ -172,7 +172,10 @@ struct globaldata {
        struct lwkt_tokref gd_handoff;          /* hand-off tokref */
        void            *gd_delayed_wakeup[2];
        void            *gd_sample_pc;          /* sample program ctr/tr */
-       void            *gd_preserved[5];       /* future fields */
+       void            *gd_reserved_pcpu_mmap; /* future */
+       uint64_t        gd_forkid;              /* per-cpu unique inc ncpus */
+       uint64_t        gd_reserved64[4];
+       void            *gd_preserved[4];       /* future fields */
        /* extended by <machine/globaldata.h> */
 };
 
index cb2dbbd..d569ca1 100644 (file)
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
- *     @(#)proc.h      8.15 (Berkeley) 5/19/95
- * $FreeBSD: src/sys/sys/proc.h,v 1.99.2.9 2003/06/06 20:21:32 tegge Exp $
  */
-
 #ifndef _SYS_PROC_H_
 #define        _SYS_PROC_H_
 
@@ -55,6 +51,7 @@
 #include <sys/rtprio.h>                        /* For struct rtprio. */
 #include <sys/signal.h>
 #include <sys/lock.h>
+#include <sys/upmap.h>
 #ifndef _KERNEL
 #include <sys/time.h>                  /* For structs itimerval, timeval. */
 #endif
@@ -344,6 +341,9 @@ struct      proc {
        void            *p_vmm;
        cpulock_t       p_vmm_cpulock;  /* count cpus in and kickout lock */
        cpumask_t       p_vmm_cpumask;  /* cpus entering or in vmm */
+       struct sys_upmap *p_upmap;      /* user RO mappable per-process page */
+       forkid_t        p_forkid;       /* unique forkid */
+       void            *p_reserveds[4]; /* reserved for future */
 };
 
 #define lwp_wchan      lwp_thread->td_wchan
@@ -575,6 +575,8 @@ void        prelezomb (struct proc *);
 void   pstall (struct proc *, const char *, int);
 void   lwpuserret(struct lwp *);
 void   lwpkthreaddeferred(void);
+void   proc_usermap(struct proc *p);
+void   proc_userunmap(struct proc *p);
 
 u_int32_t      procrunnable (void);
 
diff --git a/sys/sys/upmap.h b/sys/sys/upmap.h
new file mode 100644 (file)
index 0000000..3af5c5c
--- /dev/null
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2014 The DragonFly Project.  All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef        _SYS_UPMAP_H_
+#define        _SYS_UPMAP_H_
+
+#ifndef _SYS_TYPES_H_
+#include <sys/types.h>
+#endif
+#ifndef _SYS_TIME_H_
+#include <sys/time.h>
+#endif
+
+#define UPMAP_MAXPROCTITLE     1024
+#define UPMAP_MAPSIZE          65536
+#define KPMAP_MAPSIZE          65536
+
+#define UPMAP_VERSION          1
+#define KPMAP_VERSION          1
+
+typedef uint64_t       forkid_t;
+
+typedef struct ukpheader {
+       uint16_t        type;           /* element type */
+       uint16_t        offset;         /* offset from map base, max 65535 */
+} ukpheader_t;
+
+#define UKPLEN_MASK            0x0F00
+#define UKPLEN_1               0x0000
+#define UKPLEN_2               0x0100
+#define UKPLEN_4               0x0200
+#define UKPLEN_8               0x0300
+#define UKPLEN_16              0x0400
+#define UKPLEN_32              0x0500
+#define UKPLEN_64              0x0600
+#define UKPLEN_128             0x0700
+#define UKPLEN_256             0x0800
+#define UKPLEN_512             0x0900
+#define UKPLEN_1024            0x0A00
+
+#define UKPLEN_TS              ((sizeof(struct timespec) == 8) ? \
+                                       UKPLEN_8 : UKPLEN_16)
+
+#define UKPTYPE_VERSION                (0x0001 | UKPLEN_4)     /* always first */
+
+#define UPTYPE_RUNTICKS                (0x0010 | UKPLEN_4)
+#define UPTYPE_FORKID          (0x0011 | UKPLEN_8)
+#define UPTYPE_PID             (0x0012 | UKPLEN_4)
+#define UPTYPE_PROC_TITLE      (0x0013 | UKPLEN_1024)
+
+#define KPTYPE_UPTICKS         (0x8000 | UKPLEN_4)
+#define KPTYPE_TS_UPTIME       (0x8001 | UKPLEN_TS)
+#define KPTYPE_TS_REALTIME     (0x8002 | UKPLEN_TS)
+#define KPTYPE_TSC_FREQ                (0x8003 | UKPLEN_8)
+#define KPTYPE_TICK_FREQ       (0x8003 | UKPLEN_8)
+
+#if defined(_KERNEL) || defined(_KERNEL_STRUCTURES)
+
+/*
+ * (writable) user per-process map via /dev/upmap.
+ *
+ * ABSOLUTE LOCATIONS CAN CHANGE, ITERATE HEADERS FOR THE TYPE YOU DESIRE
+ * UNTIL YOU HIT TYPE 0, THEN CACHE THE RESULTING POINTER.
+ *
+ * If you insist, at least check that the version matches UPMAP_VERSION.
+ */
+struct sys_upmap {
+       ukpheader_t     header[64];
+       uint32_t        version;
+       uint32_t        runticks;       /* running scheduler ticks */
+       forkid_t        forkid;         /* unique 2^64 (fork detect) NOT MONO */
+       uint32_t        unused01;       /* cpu migrations (kpmap detect) */
+       pid_t           pid;            /* process id */
+       uint32_t        reserved[16];
+       char            proc_title[UPMAP_MAXPROCTITLE];
+};
+
+/*
+ * (read-only) kernel per-cpu map via /dev/kpmap.
+ *
+ * ABSOLUTE LOCATIONS CAN CHANGE, ITERATE HEADERS FOR THE TYPE YOU DESIRE
+ * UNTIL YOU HIT TYPE 0, THEN CACHE THE RESULTING POINTER.
+ *
+ * If you insist, at least check that the version matches KPMAP_VERSION.
+ */
+struct sys_kpmap {
+       ukpheader_t     header[64];
+       int32_t         version;
+       int32_t         upticks;
+       struct timespec ts_uptime;      /* mono uptime @ticks (uncompensated) */
+       struct timespec ts_realtime;    /* realtime @ticks resolution */
+       int64_t         tsc_freq;       /* (if supported by cpu) */
+       int32_t         tick_freq;      /* scheduler tick frequency */
+};
+
+#endif
+
+#ifdef _KERNEL
+extern struct sys_kpmap *kpmap;
+#endif
+
+#endif
index b479a87..8b9ec6e 100644 (file)
@@ -201,12 +201,27 @@ procfs_docmdline(struct proc *curp, struct lwp *lp, struct pfsnode *pfs,
         * don't fall back on p->p_comm or return an error: the authentic
         * Linux behaviour is to return zero-length in this case.
         */
-
-       if (p->p_args &&
+       if (p->p_upmap != NULL && p->p_upmap->proc_title[0] &&
            (ps_argsopen || (CHECKIO(curp, p) &&
-            (p->p_flags & P_INEXEC) == 0 &&
-            !p_trespass(curp->p_ucred, p->p_ucred)))
-        ) {
+                            (p->p_flags & P_INEXEC) == 0 &&
+                            !p_trespass(curp->p_ucred, p->p_ucred))
+           )) {
+               /*
+                * Args set via writable user process mmap.
+                * We must calculate the string length manually
+                * because the user data can change at any time.
+                */
+               bp = p->p_upmap->proc_title;
+               for (buflen = 0; buflen < UPMAP_MAXPROCTITLE - 1; ++buflen) {
+                       if (bp[buflen] == 0)
+                               break;
+               }
+               buf = NULL;
+       } else if (p->p_args &&
+                  (ps_argsopen || (CHECKIO(curp, p) &&
+                                   (p->p_flags & P_INEXEC) == 0 &&
+                                    !p_trespass(curp->p_ucred, p->p_ucred))
+                  )) {
                bp = p->p_args->ar_args;
                buflen = p->p_args->ar_length;
                buf = NULL;
index f04b3e8..a619775 100644 (file)
@@ -92,10 +92,17 @@ typedef u_char vm_prot_t;   /* protection codes */
 
 typedef u_char vm_maptype_t;   /* type of vm_map_entry */
 
+/*
+ * NOTE: UKSMAPs are unmanaged.  The underlying kernel memory must not be
+ *      freed until all related mappings are gone.  There is no object.
+ *      The device can map different things for the same UKS mapping even
+ *      when inherited via fork().
+ */
 #define VM_MAPTYPE_UNSPECIFIED 0
 #define VM_MAPTYPE_NORMAL      1
 #define VM_MAPTYPE_VPAGETABLE  2
 #define VM_MAPTYPE_SUBMAP      3
+#define VM_MAPTYPE_UKSMAP      4       /* user-kernel shared memory */
 
 union vm_map_object;
 typedef union vm_map_object vm_map_object_t;
index 064afcd..79b06a3 100644 (file)
@@ -1,5 +1,37 @@
 /*
- * (MPSAFE)
+ * Copyright (c) 2003-2014 The DragonFly Project.  All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * ---
  *
  * Copyright (c) 1991, 1993
  *     The Regents of the University of California.  All rights reserved.
@@ -36,8 +68,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *     from: @(#)vm_fault.c    8.4 (Berkeley) 1/12/94
- *
+ * ---
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
@@ -63,9 +94,6 @@
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
- *
- * $FreeBSD: src/sys/vm/vm_fault.c,v 1.108.2.8 2002/02/26 05:49:27 silby Exp $
- * $DragonFly: src/sys/vm/vm_fault.c,v 1.47 2008/07/01 02:02:56 dillon Exp $
  */
 
 /*
@@ -379,6 +407,7 @@ RetryFault:
        fs.lookup_still_valid = TRUE;
        fs.first_m = NULL;
        fs.object = fs.first_object;    /* so unlock_and_deallocate works */
+       fs.prot = fs.first_prot;        /* default (used by uksmap) */
 
        if (fs.entry->eflags & (MAP_ENTRY_NOFAULT | MAP_ENTRY_KSTACK)) {
                if (fs.entry->eflags & MAP_ENTRY_NOFAULT) {
@@ -393,6 +422,30 @@ RetryFault:
                }
        }
 
+       /*
+        * A user-kernel shared map has no VM object and bypasses
+        * everything.  We execute the uksmap function with a temporary
+        * fictitious vm_page.  The address is directly mapped with no
+        * management.
+        */
+       if (fs.entry->maptype == VM_MAPTYPE_UKSMAP) {
+               struct vm_page fakem;
+
+               bzero(&fakem, sizeof(fakem));
+               fakem.pindex = first_pindex;
+               fakem.flags = PG_BUSY | PG_FICTITIOUS | PG_UNMANAGED;
+               fakem.valid = VM_PAGE_BITS_ALL;
+               fakem.pat_mode = VM_MEMATTR_DEFAULT;
+               if (fs.entry->object.uksmap(fs.entry->aux.dev, &fakem)) {
+                       result = KERN_FAILURE;
+                       unlock_things(&fs);
+                       goto done2;
+               }
+               pmap_enter(fs.map->pmap, vaddr, &fakem, fs.prot | inherit_prot,
+                          fs.wired, fs.entry);
+               goto done_success;
+       }
+
        /*
         * A system map entry may return a NULL object.  No object means
         * no pager means an unrecoverable kernel fault.
@@ -528,9 +581,6 @@ RetryFault:
        vm_page_flag_set(fs.m, PG_REFERENCED);
        pmap_enter(fs.map->pmap, vaddr, fs.m, fs.prot | inherit_prot,
                   fs.wired, fs.entry);
-       mycpu->gd_cnt.v_vm_faults++;
-       if (curthread->td_lwp)
-               ++curthread->td_lwp->lwp_ru.ru_minflt;
 
        /*KKASSERT(fs.m->queue == PQ_NONE); page-in op may deactivate page */
        KKASSERT(fs.m->flags & PG_BUSY);
@@ -574,6 +624,11 @@ RetryFault:
                }
        }
 
+done_success:
+       mycpu->gd_cnt.v_vm_faults++;
+       if (curthread->td_lwp)
+               ++curthread->td_lwp->lwp_ru.ru_minflt;
+
        /*
         * Unlock everything, and return
         */
@@ -1984,9 +2039,21 @@ vm_fault_wire(vm_map_t map, vm_map_entry_t entry,
        pmap = vm_map_pmap(map);
        start = entry->start;
        end = entry->end;
-       fictitious = entry->object.vm_object &&
-                       ((entry->object.vm_object->type == OBJT_DEVICE) ||
-                        (entry->object.vm_object->type == OBJT_MGTDEVICE));
+       switch(entry->maptype) {
+       case VM_MAPTYPE_NORMAL:
+       case VM_MAPTYPE_VPAGETABLE:
+               fictitious = entry->object.vm_object &&
+                           ((entry->object.vm_object->type == OBJT_DEVICE) ||
+                            (entry->object.vm_object->type == OBJT_MGTDEVICE));
+               break;
+       case VM_MAPTYPE_UKSMAP:
+               fictitious = TRUE;
+               break;
+       default:
+               fictitious = FALSE;
+               break;
+       }
+
        if (entry->eflags & MAP_ENTRY_KSTACK)
                start += PAGE_SIZE;
        map->timestamp++;
@@ -2390,7 +2457,7 @@ vm_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry, int prot,
         * We do not currently prefault mappings that use virtual page
         * tables.  We do not prefault foreign pmaps.
         */
-       if (entry->maptype == VM_MAPTYPE_VPAGETABLE)
+       if (entry->maptype != VM_MAPTYPE_NORMAL)
                return;
        lp = curthread->td_lwp;
        if (lp == NULL || (pmap != vmspace_pmap(lp->lwp_vmspace)))
@@ -2691,7 +2758,7 @@ vm_prefault_quick(pmap_t pmap, vm_offset_t addra,
         * We do not currently prefault mappings that use virtual page
         * tables.  We do not prefault foreign pmaps.
         */
-       if (entry->maptype == VM_MAPTYPE_VPAGETABLE)
+       if (entry->maptype != VM_MAPTYPE_NORMAL)
                return;
        lp = curthread->td_lwp;
        if (lp == NULL || (pmap != vmspace_pmap(lp->lwp_vmspace)))
index 92d4d0a..94a8dbb 100644 (file)
@@ -102,11 +102,11 @@ kmem_alloc_pageable(vm_map_t map, vm_size_t size)
 
        size = round_page(size);
        addr = vm_map_min(map);
-       result = vm_map_find(map, NULL, (vm_offset_t) 0,
-                            &addr, size, PAGE_SIZE,
+       result = vm_map_find(map, NULL, NULL,
+                            (vm_offset_t) 0, &addr, size,
+                            PAGE_SIZE,
                             TRUE, VM_MAPTYPE_NORMAL,
-                            VM_PROT_ALL, VM_PROT_ALL,
-                            0);
+                            VM_PROT_ALL, VM_PROT_ALL, 0);
        if (result != KERN_SUCCESS)
                return (0);
        return (addr);
@@ -125,11 +125,11 @@ kmem_alloc_nofault(vm_map_t map, vm_size_t size, vm_size_t align)
 
        size = round_page(size);
        addr = vm_map_min(map);
-       result = vm_map_find(map, NULL, (vm_offset_t) 0,
-                            &addr, size, align,
+       result = vm_map_find(map, NULL, NULL,
+                            (vm_offset_t) 0, &addr, size,
+                            align,
                             TRUE, VM_MAPTYPE_NORMAL,
-                            VM_PROT_ALL, VM_PROT_ALL,
-                            MAP_NOFAULT);
+                            VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
        if (result != KERN_SUCCESS)
                return (0);
        return (addr);
@@ -184,10 +184,10 @@ kmem_alloc3(vm_map_t map, vm_size_t size, int kmflags)
        vm_object_hold(&kernel_object);
        vm_object_reference_locked(&kernel_object);
        vm_map_insert(map, &count,
-                     &kernel_object, addr, addr, addr + size,
+                     &kernel_object, NULL,
+                     addr, addr, addr + size,
                      VM_MAPTYPE_NORMAL,
-                     VM_PROT_ALL, VM_PROT_ALL,
-                     cow);
+                     VM_PROT_ALL, VM_PROT_ALL, cow);
        vm_object_drop(&kernel_object);
 
        vm_map_unlock(map);
@@ -273,11 +273,11 @@ kmem_suballoc(vm_map_t parent, vm_map_t result,
        size = round_page(size);
 
        *min = (vm_offset_t) vm_map_min(parent);
-       ret = vm_map_find(parent, NULL, (vm_offset_t) 0,
-                         min, size, PAGE_SIZE,
+       ret = vm_map_find(parent, NULL, NULL,
+                         (vm_offset_t) 0, min, size,
+                         PAGE_SIZE,
                          TRUE, VM_MAPTYPE_UNSPECIFIED,
-                         VM_PROT_ALL, VM_PROT_ALL,
-                         0);
+                         VM_PROT_ALL, VM_PROT_ALL, 0);
        if (ret != KERN_SUCCESS) {
                kprintf("kmem_suballoc: bad status return of %d.\n", ret);
                panic("kmem_suballoc");
@@ -325,8 +325,8 @@ kmem_alloc_wait(vm_map_t map, vm_size_t size)
                tsleep(map, 0, "kmaw", 0);
        }
        vm_map_insert(map, &count,
-                     NULL, (vm_offset_t) 0,
-                     addr, addr + size,
+                     NULL, NULL,
+                     (vm_offset_t) 0, addr, addr + size,
                      VM_MAPTYPE_NORMAL,
                      VM_PROT_ALL, VM_PROT_ALL,
                      0);
@@ -356,7 +356,7 @@ kmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
        count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
        vm_map_lock(map);
        if (vm_map_findspace(map, vm_map_min(map), size, PAGE_SIZE,
-                               flags, &addr)) {
+                            flags, &addr)) {
                vm_map_unlock(map);
                vm_map_entry_release(count);
                return (0);
@@ -364,8 +364,11 @@ kmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
        offset = addr - vm_map_min(&kernel_map);
        vm_object_hold(&kernel_object);
        vm_object_reference_locked(&kernel_object);
-       vm_map_insert(map, &count, &kernel_object, offset, addr, addr + size,
-               VM_MAPTYPE_NORMAL, VM_PROT_ALL, VM_PROT_ALL, 0);
+       vm_map_insert(map, &count,
+                     &kernel_object, NULL,
+                     offset, addr, addr + size,
+                     VM_MAPTYPE_NORMAL,
+                     VM_PROT_ALL, VM_PROT_ALL, 0);
        vm_map_unlock(map);
        vm_map_entry_release(count);
        vm_object_drop(&kernel_object);
@@ -431,28 +434,28 @@ kmem_init(void)
        addr = KvaStart;
        if (virtual2_start) {
                if (addr < virtual2_start) {
-                       vm_map_insert(m, &count, NULL, (vm_offset_t) 0,
-                                     addr, virtual2_start,
+                       vm_map_insert(m, &count,
+                                     NULL, NULL,
+                                     (vm_offset_t) 0, addr, virtual2_start,
                                      VM_MAPTYPE_NORMAL,
-                                     VM_PROT_ALL, VM_PROT_ALL,
-                                     0);
+                                     VM_PROT_ALL, VM_PROT_ALL, 0);
                }
                addr = virtual2_end;
        }
        if (addr < virtual_start) {
-               vm_map_insert(m, &count, NULL, (vm_offset_t) 0,
-                             addr, virtual_start,
+               vm_map_insert(m, &count,
+                             NULL, NULL,
+                             (vm_offset_t) 0, addr, virtual_start,
                              VM_MAPTYPE_NORMAL,
-                             VM_PROT_ALL, VM_PROT_ALL,
-                             0);
+                             VM_PROT_ALL, VM_PROT_ALL, 0);
        }
        addr = virtual_end;
        if (addr < KvaEnd) {
-               vm_map_insert(m, &count, NULL, (vm_offset_t) 0,
-                             addr, KvaEnd,
+               vm_map_insert(m, &count,
+                             NULL, NULL,
+                             (vm_offset_t) 0, addr, KvaEnd,
                              VM_MAPTYPE_NORMAL,
-                             VM_PROT_ALL, VM_PROT_ALL,
-                             0);
+                             VM_PROT_ALL, VM_PROT_ALL, 0);
        }
        /* ... and ending with the completion of the above `insert' */
        vm_map_unlock(m);
index 851a7b9..e4f6eef 100644 (file)
@@ -965,18 +965,22 @@ vm_map_lookup_entry(vm_map_t map, vm_offset_t address, vm_map_entry_t *entry)
  * making call to account for the new entry.
  */
 int
-vm_map_insert(vm_map_t map, int *countp,
-             vm_object_t object, vm_ooffset_t offset,
-             vm_offset_t start, vm_offset_t end,
+vm_map_insert(vm_map_t map, int *countp, void *map_object, void *map_aux,
+             vm_ooffset_t offset, vm_offset_t start, vm_offset_t end,
              vm_maptype_t maptype,
-             vm_prot_t prot, vm_prot_t max,
-             int cow)
+             vm_prot_t prot, vm_prot_t max, int cow)
 {
        vm_map_entry_t new_entry;
        vm_map_entry_t prev_entry;
        vm_map_entry_t temp_entry;
        vm_eflags_t protoeflags;
        int must_drop = 0;
+       vm_object_t object;
+
+       if (maptype == VM_MAPTYPE_UKSMAP)
+               object = NULL;
+       else
+               object = map_object;
 
        ASSERT_VM_MAP_LOCKED(map);
        if (object)
@@ -1048,6 +1052,7 @@ vm_map_insert(vm_map_t map, int *countp,
                 (prev_entry->end == start) &&
                 (prev_entry->wired_count == 0) &&
                 prev_entry->maptype == maptype &&
+                maptype == VM_MAPTYPE_NORMAL &&
                 ((prev_entry->object.vm_object == NULL) ||
                  vm_object_coalesce(prev_entry->object.vm_object,
                                     OFF_TO_IDX(prev_entry->offset),
@@ -1101,9 +1106,10 @@ vm_map_insert(vm_map_t map, int *countp,
 
        new_entry->maptype = maptype;
        new_entry->eflags = protoeflags;
-       new_entry->object.vm_object = object;
+       new_entry->object.map_object = map_object;
+       new_entry->aux.master_pde = 0;          /* in case size is different */
+       new_entry->aux.map_aux = map_aux;
        new_entry->offset = offset;
-       new_entry->aux.master_pde = 0;
 
        new_entry->inheritance = VM_INHERIT_DEFAULT;
        new_entry->protection = prot;
@@ -1145,7 +1151,8 @@ vm_map_insert(vm_map_t map, int *countp,
         * don't try.
         */
        if ((cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) &&
-           maptype != VM_MAPTYPE_VPAGETABLE) {
+           maptype != VM_MAPTYPE_VPAGETABLE &&
+           maptype != VM_MAPTYPE_UKSMAP) {
                int dorelock = 0;
                if (vm_map_relock_enable && (cow & MAP_PREFAULT_RELOCK)) {
                        dorelock = 1;
@@ -1306,17 +1313,24 @@ vm_map_findspace(vm_map_t map, vm_offset_t start, vm_size_t length,
  * No requirements.  This function will lock the map temporarily.
  */
 int
-vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
-           vm_offset_t *addr,  vm_size_t length, vm_size_t align,
+vm_map_find(vm_map_t map, void *map_object, void *map_aux,
+           vm_ooffset_t offset, vm_offset_t *addr,
+           vm_size_t length, vm_size_t align,
            boolean_t fitit,
            vm_maptype_t maptype,
            vm_prot_t prot, vm_prot_t max,
            int cow)
 {
        vm_offset_t start;
+       vm_object_t object;
        int result;
        int count;
 
+       if (maptype == VM_MAPTYPE_UKSMAP)
+               object = NULL;
+       else
+               object = map_object;
+
        start = *addr;
 
        count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
@@ -1333,11 +1347,9 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
                }
                start = *addr;
        }
-       result = vm_map_insert(map, &count, object, offset,
-                              start, start + length,
-                              maptype,
-                              prot, max,
-                              cow);
+       result = vm_map_insert(map, &count, map_object, map_aux,
+                              offset, start, start + length,
+                              maptype, prot, max, cow);
        if (object)
                vm_object_drop(object);
        vm_map_unlock(map);
@@ -1370,6 +1382,8 @@ vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry, int *countp)
 
        if (entry->maptype == VM_MAPTYPE_SUBMAP)
                return;
+       if (entry->maptype == VM_MAPTYPE_UKSMAP)
+               return;
 
        prev = entry->prev;
        if (prev != &map->header) {
@@ -2205,7 +2219,8 @@ vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t real_end,
                         * management structures and the faulting in of the
                         * page.
                         */
-                       if (entry->maptype != VM_MAPTYPE_SUBMAP) {
+                       if (entry->maptype == VM_MAPTYPE_NORMAL ||
+                           entry->maptype == VM_MAPTYPE_VPAGETABLE) {
                                int copyflag = entry->eflags &
                                               MAP_ENTRY_NEEDS_COPY;
                                if (copyflag && ((entry->protection &
@@ -2402,7 +2417,8 @@ vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t real_end, int kmflags)
                         * do not have to do this for entries that point to sub
                         * maps because we won't hold the lock on the sub map.
                         */
-                       if (entry->maptype != VM_MAPTYPE_SUBMAP) {
+                       if (entry->maptype == VM_MAPTYPE_NORMAL ||
+                           entry->maptype == VM_MAPTYPE_VPAGETABLE) {
                                int copyflag = entry->eflags &
                                               MAP_ENTRY_NEEDS_COPY;
                                if (copyflag && ((entry->protection &
@@ -2612,7 +2628,10 @@ vm_map_clean(vm_map_t map, vm_offset_t start, vm_offset_t end,
        for (current = entry; current->start < end; current = current->next) {
                offset = current->offset + (start - current->start);
                size = (end <= current->end ? end : current->end) - start;
-               if (current->maptype == VM_MAPTYPE_SUBMAP) {
+
+               switch(current->maptype) {
+               case VM_MAPTYPE_SUBMAP:
+               {
                        vm_map_t smap;
                        vm_map_entry_t tentry;
                        vm_size_t tsize;
@@ -2626,8 +2645,15 @@ vm_map_clean(vm_map_t map, vm_offset_t start, vm_offset_t end,
                        object = tentry->object.vm_object;
                        offset = tentry->offset + (offset - tentry->start);
                        vm_map_unlock_read(smap);
-               } else {
+                       break;
+               }
+               case VM_MAPTYPE_NORMAL:
+               case VM_MAPTYPE_VPAGETABLE:
                        object = current->object.vm_object;
+                       break;
+               default:
+                       object = NULL;
+                       break;
                }
 
                if (object)
@@ -2759,8 +2785,12 @@ vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry, int *countp)
        switch(entry->maptype) {
        case VM_MAPTYPE_NORMAL:
        case VM_MAPTYPE_VPAGETABLE:
+       case VM_MAPTYPE_SUBMAP:
                vm_object_deallocate(entry->object.vm_object);
                break;
+       case VM_MAPTYPE_UKSMAP:
+               /* XXX TODO */
+               break;
        default:
                break;
        }
@@ -2847,7 +2877,17 @@ again:
 
                offidxstart = OFF_TO_IDX(entry->offset);
                count = OFF_TO_IDX(e - s);
-               object = entry->object.vm_object;
+
+               switch(entry->maptype) {
+               case VM_MAPTYPE_NORMAL:
+               case VM_MAPTYPE_VPAGETABLE:
+               case VM_MAPTYPE_SUBMAP:
+                       object = entry->object.vm_object;
+                       break;
+               default:
+                       object = NULL;
+                       break;
+               }
 
                /*
                 * Unwire before removing addresses from the pmap; otherwise,
@@ -3260,9 +3300,11 @@ vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map,
 {
        vm_object_t src_object;
 
-       if (dst_entry->maptype == VM_MAPTYPE_SUBMAP)
+       if (dst_entry->maptype == VM_MAPTYPE_SUBMAP ||
+           dst_entry->maptype == VM_MAPTYPE_UKSMAP)
                return;
-       if (src_entry->maptype == VM_MAPTYPE_SUBMAP)
+       if (src_entry->maptype == VM_MAPTYPE_SUBMAP ||
+           src_entry->maptype == VM_MAPTYPE_UKSMAP)
                return;
 
        if (src_entry->wired_count == 0) {
@@ -3330,6 +3372,11 @@ vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map,
  * The source map must not be locked.
  * No requirements.
  */
+static void vmspace_fork_normal_entry(vm_map_t old_map, vm_map_t new_map,
+                         vm_map_entry_t old_entry, int *countp);
+static void vmspace_fork_uksmap_entry(vm_map_t old_map, vm_map_t new_map,
+                         vm_map_entry_t old_entry, int *countp);
+
 struct vmspace *
 vmspace_fork(struct vmspace *vm1)
 {
@@ -3337,8 +3384,6 @@ vmspace_fork(struct vmspace *vm1)
        vm_map_t old_map = &vm1->vm_map;
        vm_map_t new_map;
        vm_map_entry_t old_entry;
-       vm_map_entry_t new_entry;
-       vm_object_t object;
        int count;
 
        lwkt_gettoken(&vm1->vm_map.token);
@@ -3364,98 +3409,18 @@ vmspace_fork(struct vmspace *vm1)
 
        old_entry = old_map->header.next;
        while (old_entry != &old_map->header) {
-               if (old_entry->maptype == VM_MAPTYPE_SUBMAP)
+               switch(old_entry->maptype) {
+               case VM_MAPTYPE_SUBMAP:
                        panic("vm_map_fork: encountered a submap");
-
-               switch (old_entry->inheritance) {
-               case VM_INHERIT_NONE:
                        break;
-               case VM_INHERIT_SHARE:
-                       /*
-                        * Clone the entry, creating the shared object if
-                        * necessary.
-                        */
-                       if (old_entry->object.vm_object == NULL)
-                               vm_map_entry_allocate_object(old_entry);
-
-                       if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
-                               /*
-                                * Shadow a map_entry which needs a copy,
-                                * replacing its object with a new object
-                                * that points to the old one.  Ask the
-                                * shadow code to automatically add an
-                                * additional ref.  We can't do it afterwords
-                                * because we might race a collapse.  The call
-                                * to vm_map_entry_shadow() will also clear
-                                * OBJ_ONEMAPPING.
-                                */
-                               vm_map_entry_shadow(old_entry, 1);
-                       } else if (old_entry->object.vm_object) {
-                               /*
-                                * We will make a shared copy of the object,
-                                * and must clear OBJ_ONEMAPPING.
-                                *
-                                * Optimize vnode objects.  OBJ_ONEMAPPING
-                                * is non-applicable but clear it anyway,
-                                * and its terminal so we don'th ave to deal
-                                * with chains.  Reduces SMP conflicts.
-                                *
-                                * XXX assert that object.vm_object != NULL
-                                *     since we allocate it above.
-                                */
-                               object = old_entry->object.vm_object;
-                               if (object->type == OBJT_VNODE) {
-                                       vm_object_reference_quick(object);
-                                       vm_object_clear_flag(object,
-                                                            OBJ_ONEMAPPING);
-                               } else {
-                                       vm_object_hold(object);
-                                       vm_object_chain_wait(object, 0);
-                                       vm_object_reference_locked(object);
-                                       vm_object_clear_flag(object,
-                                                            OBJ_ONEMAPPING);
-                                       vm_object_drop(object);
-                               }
-                       }
-
-                       /*
-                        * Clone the entry.  We've already bumped the ref on
-                        * any vm_object.
-                        */
-                       new_entry = vm_map_entry_create(new_map, &count);
-                       *new_entry = *old_entry;
-                       new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
-                       new_entry->wired_count = 0;
-
-                       /*
-                        * Insert the entry into the new map -- we know we're
-                        * inserting at the end of the new map.
-                        */
-
-                       vm_map_entry_link(new_map, new_map->header.prev,
-                                         new_entry);
-
-                       /*
-                        * Update the physical map
-                        */
-                       pmap_copy(new_map->pmap, old_map->pmap,
-                                 new_entry->start,
-                                 (old_entry->end - old_entry->start),
-                                 old_entry->start);
+               case VM_MAPTYPE_UKSMAP:
+                       vmspace_fork_uksmap_entry(old_map, new_map,
+                                                 old_entry, &count);
                        break;
-               case VM_INHERIT_COPY:
-                       /*
-                        * Clone the entry and link into the map.
-                        */
-                       new_entry = vm_map_entry_create(new_map, &count);
-                       *new_entry = *old_entry;
-                       new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
-                       new_entry->wired_count = 0;
-                       new_entry->object.vm_object = NULL;
-                       vm_map_entry_link(new_map, new_map->header.prev,
-                                         new_entry);
-                       vm_map_copy_entry(old_map, new_map, old_entry,
-                                         new_entry);
+               case VM_MAPTYPE_NORMAL:
+               case VM_MAPTYPE_VPAGETABLE:
+                       vmspace_fork_normal_entry(old_map, new_map,
+                                                 old_entry, &count);
                        break;
                }
                old_entry = old_entry->next;
@@ -3472,6 +3437,126 @@ vmspace_fork(struct vmspace *vm1)
        return (vm2);
 }
 
+static
+void
+vmspace_fork_normal_entry(vm_map_t old_map, vm_map_t new_map,
+                         vm_map_entry_t old_entry, int *countp)
+{
+       vm_map_entry_t new_entry;
+       vm_object_t object;
+
+       switch (old_entry->inheritance) {
+       case VM_INHERIT_NONE:
+               break;
+       case VM_INHERIT_SHARE:
+               /*
+                * Clone the entry, creating the shared object if
+                * necessary.
+                */
+               if (old_entry->object.vm_object == NULL)
+                       vm_map_entry_allocate_object(old_entry);
+
+               if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
+                       /*
+                        * Shadow a map_entry which needs a copy,
+                        * replacing its object with a new object
+                        * that points to the old one.  Ask the
+                        * shadow code to automatically add an
+                        * additional ref.  We can't do it afterwords
+                        * because we might race a collapse.  The call
+                        * to vm_map_entry_shadow() will also clear
+                        * OBJ_ONEMAPPING.
+                        */
+                       vm_map_entry_shadow(old_entry, 1);
+               } else if (old_entry->object.vm_object) {
+                       /*
+                        * We will make a shared copy of the object,
+                        * and must clear OBJ_ONEMAPPING.
+                        *
+                        * Optimize vnode objects.  OBJ_ONEMAPPING
+                        * is non-applicable but clear it anyway,
+                        * and its terminal so we don'th ave to deal
+                        * with chains.  Reduces SMP conflicts.
+                        *
+                        * XXX assert that object.vm_object != NULL
+                        *     since we allocate it above.
+                        */
+                       object = old_entry->object.vm_object;
+                       if (object->type == OBJT_VNODE) {
+                               vm_object_reference_quick(object);
+                               vm_object_clear_flag(object,
+                                                    OBJ_ONEMAPPING);
+                       } else {
+                               vm_object_hold(object);
+                               vm_object_chain_wait(object, 0);
+                               vm_object_reference_locked(object);
+                               vm_object_clear_flag(object,
+                                                    OBJ_ONEMAPPING);
+                               vm_object_drop(object);
+                       }
+               }
+
+               /*
+                * Clone the entry.  We've already bumped the ref on
+                * any vm_object.
+                */
+               new_entry = vm_map_entry_create(new_map, countp);
+               *new_entry = *old_entry;
+               new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
+               new_entry->wired_count = 0;
+
+               /*
+                * Insert the entry into the new map -- we know we're
+                * inserting at the end of the new map.
+                */
+
+               vm_map_entry_link(new_map, new_map->header.prev,
+                                 new_entry);
+
+               /*
+                * Update the physical map
+                */
+               pmap_copy(new_map->pmap, old_map->pmap,
+                         new_entry->start,
+                         (old_entry->end - old_entry->start),
+                         old_entry->start);
+               break;
+       case VM_INHERIT_COPY:
+               /*
+                * Clone the entry and link into the map.
+                */
+               new_entry = vm_map_entry_create(new_map, countp);
+               *new_entry = *old_entry;
+               new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
+               new_entry->wired_count = 0;
+               new_entry->object.vm_object = NULL;
+               vm_map_entry_link(new_map, new_map->header.prev,
+                                 new_entry);
+               vm_map_copy_entry(old_map, new_map, old_entry,
+                                 new_entry);
+               break;
+       }
+}
+
+/*
+ * When forking user-kernel shared maps, the map might change in the
+ * child so do not try to copy the underlying pmap entries.
+ */
+static
+void
+vmspace_fork_uksmap_entry(vm_map_t old_map, vm_map_t new_map,
+                         vm_map_entry_t old_entry, int *countp)
+{
+       vm_map_entry_t new_entry;
+
+       new_entry = vm_map_entry_create(new_map, countp);
+       *new_entry = *old_entry;
+       new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
+       new_entry->wired_count = 0;
+       vm_map_entry_link(new_map, new_map->header.prev,
+                         new_entry);
+}
+
 /*
  * Create an auto-grow stack entry
  *
@@ -3555,12 +3640,11 @@ vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
         * eliminate these as input parameters, and just
         * pass these values here in the insert call.
         */
-       rv = vm_map_insert(map, &count,
-                          NULL, 0, addrbos + max_ssize - init_ssize,
+       rv = vm_map_insert(map, &count, NULL, NULL,
+                          0, addrbos + max_ssize - init_ssize,
                           addrbos + max_ssize,
                           VM_MAPTYPE_NORMAL,
-                          prot, max,
-                          cow);
+                          prot, max, cow);
 
        /* Now set the avail_ssize amount */
        if (rv == KERN_SUCCESS) {
@@ -3710,11 +3794,10 @@ Retry:
                addr = end;
        }
 
-       rv = vm_map_insert(map, &count,
-                          NULL, 0, addr, stack_entry->start,
+       rv = vm_map_insert(map, &count, NULL, NULL,
+                          0, addr, stack_entry->start,
                           VM_MAPTYPE_NORMAL,
-                          VM_PROT_ALL, VM_PROT_ALL,
-                          0);
+                          VM_PROT_ALL, VM_PROT_ALL, 0);
 
        /* Adjust the available stack space by the amount we grew. */
        if (rv == KERN_SUCCESS) {
@@ -3992,6 +4075,15 @@ RetryLookup:
                        fault_type |= VM_PROT_WRITE;
        }
 
+       /*
+        * Only NORMAL and VPAGETABLE maps are object-based.  UKSMAPs are not.
+        */
+       if (entry->maptype != VM_MAPTYPE_NORMAL &&
+           entry->maptype != VM_MAPTYPE_VPAGETABLE) {
+               *object = NULL;
+               goto skip;
+       }
+
        /*
         * If the entry was copy-on-write, we either ...
         */
@@ -4047,9 +4139,10 @@ RetryLookup:
         * Return the object/offset from this entry.  If the entry was
         * copy-on-write or empty, it has been fixed up.
         */
+       *object = entry->object.vm_object;
 
+skip:
        *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
-       *object = entry->object.vm_object;
 
        /*
         * Return whether this is the only map sharing this data.  On
@@ -4129,7 +4222,8 @@ DB_SHOW_COMMAND(map, vm_map_print)
                        if (entry->wired_count != 0)
                                db_printf(", wired");
                }
-               if (entry->maptype == VM_MAPTYPE_SUBMAP) {
+               switch(entry->maptype) {
+               case VM_MAPTYPE_SUBMAP:
                        /* XXX no %qd in kernel.  Truncate entry->offset. */
                        db_printf(", share=%p, offset=0x%lx\n",
                            (void *)entry->object.sub_map,
@@ -4144,7 +4238,9 @@ DB_SHOW_COMMAND(map, vm_map_print)
                                             full, 0, NULL);
                                db_indent -= 2;
                        }
-               } else {
+                       break;
+               case VM_MAPTYPE_NORMAL:
+               case VM_MAPTYPE_VPAGETABLE:
                        /* XXX no %qd in kernel.  Truncate entry->offset. */
                        db_printf(", object=%p, offset=0x%lx",
                            (void *)entry->object.vm_object,
@@ -4165,6 +4261,19 @@ DB_SHOW_COMMAND(map, vm_map_print)
                                nlines += 4;
                                db_indent -= 2;
                        }
+                       break;
+               case VM_MAPTYPE_UKSMAP:
+                       db_printf(", uksmap=%p, offset=0x%lx",
+                           (void *)entry->object.uksmap,
+                           (long)entry->offset);
+                       if (entry->eflags & MAP_ENTRY_COW)
+                               db_printf(", copy (%s)",
+                                   (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
+                       db_printf("\n");
+                       nlines++;
+                       break;
+               default:
+                       break;
                }
        }
        db_indent -= 2;
index b85c3ae..3dae298 100644 (file)
@@ -118,18 +118,21 @@ typedef u_int vm_flags_t;
 typedef u_int vm_eflags_t;
 
 /*
- *     Objects which live in maps may be either VM objects, or
- *     another map (called a "sharing map") which denotes read-write
- *     sharing with other maps.
+ * A vm_map_entry may reference an object, a submap, a uksmap, or a
+ * direct user-kernel shared map.
  */
 union vm_map_object {
        struct vm_object *vm_object;    /* object object */
        struct vm_map *sub_map;         /* belongs to another map */
+       int     (*uksmap)(cdev_t dev, vm_page_t fake);
+       void    *map_object;            /* generic */
 };
 
 union vm_map_aux {
        vm_offset_t avail_ssize;        /* amt can grow if this is a stack */
        vpte_t master_pde;              /* virtual page table root */
+       struct cdev *dev;
+       void    *map_aux;
 };
 
 /*
@@ -534,21 +537,20 @@ void vm_map_entry_release(int);
 void vm_map_entry_krelease(int);
 vm_map_t vm_map_create (vm_map_t, struct pmap *, vm_offset_t, vm_offset_t);
 int vm_map_delete (vm_map_t, vm_offset_t, vm_offset_t, int *);
-int vm_map_find (vm_map_t, vm_object_t, vm_ooffset_t,
-                vm_offset_t *, vm_size_t, vm_size_t,
+int vm_map_find (vm_map_t, void *, void *,
+                vm_ooffset_t, vm_offset_t *, vm_size_t,
+                vm_size_t,
                 boolean_t, vm_maptype_t,
-                vm_prot_t, vm_prot_t, 
-                int);
+                vm_prot_t, vm_prot_t, int);
 int vm_map_findspace (vm_map_t, vm_offset_t, vm_size_t, vm_size_t,
                      int, vm_offset_t *);
 vm_offset_t vm_map_hint(struct proc *, vm_offset_t, vm_prot_t);
 int vm_map_inherit (vm_map_t, vm_offset_t, vm_offset_t, vm_inherit_t);
 void vm_map_init (struct vm_map *, vm_offset_t, vm_offset_t, pmap_t);
-int vm_map_insert (vm_map_t, int *, vm_object_t, vm_ooffset_t,
-                  vm_offset_t, vm_offset_t,
+int vm_map_insert (vm_map_t, int *, void *, void *,
+                  vm_ooffset_t, vm_offset_t, vm_offset_t,
                   vm_maptype_t,
-                  vm_prot_t, vm_prot_t,
-                  int);
+                  vm_prot_t, vm_prot_t, int);
 int vm_map_lookup (vm_map_t *, vm_offset_t, vm_prot_t, vm_map_entry_t *, vm_object_t *,
     vm_pindex_t *, vm_prot_t *, boolean_t *);
 void vm_map_lookup_done (vm_map_t, vm_map_entry_t, int);
index ff56bf1..29d0ed7 100644 (file)
@@ -1188,6 +1188,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
        vm_offset_t eaddr;
        vm_size_t   esize;
        vm_size_t   align;
+       int (*uksmap)(cdev_t dev, vm_page_t fake);
        struct vnode *vp;
        struct thread *td = curthread;
        struct proc *p;
@@ -1276,6 +1277,8 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
                        vm_map_remove(map, *addr, *addr + size);
        }
 
+       uksmap = NULL;
+
        /*
         * Lookup/allocate object.
         */
@@ -1306,7 +1309,32 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
                vp = NULL;
        } else {
                vp = (struct vnode *)handle;
+
+               /*
+                * Non-anonymous mappings of VCHR (aka not /dev/zero)
+                * cannot specify MAP_STACK or MAP_VPAGETABLE.
+                */
                if (vp->v_type == VCHR) {
+                       if (flags & (MAP_STACK | MAP_VPAGETABLE)) {
+                               lwkt_reltoken(&map->token);
+                               return(EINVAL);
+                       }
+               }
+
+               if (vp->v_type == VCHR && vp->v_rdev->si_ops->d_uksmap) {
+                       /*
+                        * Device mappings without a VM object, typically
+                        * sharing permanently allocated kernel memory or
+                        * process-context-specific (per-process) data.
+                        *
+                        * Force them to be shared.
+                        */
+                       uksmap = vp->v_rdev->si_ops->d_uksmap;
+                       object = NULL;
+                       docow = MAP_PREFAULT_PARTIAL;
+                       flags &= ~(MAP_PRIVATE|MAP_COPY);
+                       flags |= MAP_SHARED;
+               } else if (vp->v_type == VCHR) {
                        /*
                         * Device mappings (device size unknown?).
                         * Force them to be shared.
@@ -1332,7 +1360,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
                } else {
                        /*
                         * Regular file mapping (typically).  The attribute
-                        * check is for the link count test only.  Mmapble
+                        * check is for the link count test only.  mmapable
                         * vnodes must already have a VM object assigned.
                         */
                        struct vattr vat;
@@ -1383,6 +1411,8 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
        /*
         * This may place the area in its own page directory if (size) is
         * large enough, otherwise it typically returns its argument.
+        *
+        * (object can be NULL)
         */
        if (fitit) {
                *addr = pmap_addr_hint(object, *addr, size);
@@ -1394,15 +1424,25 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
         * Mappings that use virtual page tables will default to storing
         * the page table at offset 0.
         */
-       if (flags & MAP_STACK) {
+       if (uksmap) {
+               rv = vm_map_find(map, uksmap, vp->v_rdev,
+                                foff, addr, size,
+                                align,
+                                fitit, VM_MAPTYPE_UKSMAP,
+                                prot, maxprot, docow);
+       } else if (flags & MAP_STACK) {
                rv = vm_map_stack(map, *addr, size, flags,
                                  prot, maxprot, docow);
        } else if (flags & MAP_VPAGETABLE) {
-               rv = vm_map_find(map, object, foff, addr, size, align,
+               rv = vm_map_find(map, object, NULL,
+                                foff, addr, size,
+                                align,
                                 fitit, VM_MAPTYPE_VPAGETABLE,
                                 prot, maxprot, docow);
        } else {
-               rv = vm_map_find(map, object, foff, addr, size, align,
+               rv = vm_map_find(map, object, NULL,
+                                foff, addr, size,
+                                align,
                                 fitit, VM_MAPTYPE_NORMAL,
                                 prot, maxprot, docow);
        }
@@ -1412,6 +1452,8 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
                 * Lose the object reference. Will destroy the
                 * object if it's an unnamed anonymous mapping
                 * or named anonymous without other references.
+                *
+                * (NOTE: object can be NULL)
                 */
                vm_object_deallocate(object);
                goto out;
index df7e934..a24e707 100644 (file)
@@ -112,11 +112,11 @@ sys_obreak(struct obreak_args *uap)
                        error = ENOMEM;
                        goto done;
                }
-               rv = vm_map_find(&vm->vm_map, NULL, 0, &old,
-                                diff, PAGE_SIZE,
+               rv = vm_map_find(&vm->vm_map, NULL, NULL,
+                                0, &old, diff,
+                                PAGE_SIZE,
                                 FALSE, VM_MAPTYPE_NORMAL,
-                                VM_PROT_ALL, VM_PROT_ALL,
-                                0);
+                                VM_PROT_ALL, VM_PROT_ALL, 0);
                if (rv != KERN_SUCCESS) {
                        error = ENOMEM;
                        goto done;