From c1692ddf9735ab0d85ef3a98b211dbf18f142f81 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Wed, 30 Dec 2009 11:38:39 -0800 Subject: [PATCH] kernel - pmap (i386) - Refactor the foreign pmap mapping * Remove the global alternative pmap and related hacks. Leave the PTD entry reserved because I really do not want to change the location of MPPTDI (The PDE via which the globaldata array is mapped). * Reserve SMP_MAXCPU (16) PTDs in the kernel page table for per-cpu page table maps. This eats about 64MB of KVM (not 64M of physical memory). * Adjust get_ptbase() in platform/pc32/i386/pmap.c to use the per-cpu map. This greatly simplifies the tracking required to determine when a cpu_invltlb() call is required. get_ptbase() is now MPSAFE but the rest of the pmap system is not, yet. This is a big step though. * Note that because these are PTD mappings and not PTE mappings, each process pmap gets its own set, and because processes may be threaded the new get_ptbase() still WILL NOT safely survive a thread switch. On the otherhand, for vkernels, this inherent (process pmap x NCPU) pagetable mapping cache should work quite efficiently. The vkernel threads running on different physical cpus will get their own page table map cache ptd. --- sys/platform/pc32/i386/genassym.c | 1 + sys/platform/pc32/i386/mp_machdep.c | 2 + sys/platform/pc32/i386/pmap.c | 73 +++++++++++++++----------- sys/platform/pc32/include/globaldata.h | 4 +- sys/platform/pc32/include/pmap.h | 64 +++++++++++++++------- 5 files changed, 94 insertions(+), 50 deletions(-) diff --git a/sys/platform/pc32/i386/genassym.c b/sys/platform/pc32/i386/genassym.c index 0980eae34c..adeef1ebd2 100644 --- a/sys/platform/pc32/i386/genassym.c +++ b/sys/platform/pc32/i386/genassym.c @@ -118,6 +118,7 @@ ASSYM(NPTEPG, NPTEPG); ASSYM(NPDEPG, NPDEPG); ASSYM(PDESIZE, PDESIZE); ASSYM(PTESIZE, PTESIZE); +ASSYM(SMP_MAXCPU, SMP_MAXCPU); ASSYM(PAGE_SHIFT, PAGE_SHIFT); ASSYM(PAGE_MASK, PAGE_MASK); ASSYM(PDRSHIFT, PDRSHIFT); diff --git a/sys/platform/pc32/i386/mp_machdep.c b/sys/platform/pc32/i386/mp_machdep.c index b853258ca0..ff84e23857 100644 --- a/sys/platform/pc32/i386/mp_machdep.c +++ b/sys/platform/pc32/i386/mp_machdep.c @@ -2241,10 +2241,12 @@ start_all_aps(u_int boot_addr) gd->gd_CMAP2 = &SMPpt[pg + 1]; gd->gd_CMAP3 = &SMPpt[pg + 2]; gd->gd_PMAP1 = &SMPpt[pg + 3]; + gd->gd_GDMAP1 = &PTD[KGDTDI+i]; gd->gd_CADDR1 = ps->CPAGE1; gd->gd_CADDR2 = ps->CPAGE2; gd->gd_CADDR3 = ps->CPAGE3; gd->gd_PADDR1 = (unsigned *)ps->PPAGE1; + gd->gd_GDADDR1= (unsigned *)VADDR(KGDTDI+i, 0); gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * (mp_naps + 1)); bzero(gd->mi.gd_ipiq, sizeof(lwkt_ipiq) * (mp_naps + 1)); diff --git a/sys/platform/pc32/i386/pmap.c b/sys/platform/pc32/i386/pmap.c index 81cbeaffd3..71d60b7fd1 100644 --- a/sys/platform/pc32/i386/pmap.c +++ b/sys/platform/pc32/i386/pmap.c @@ -161,7 +161,6 @@ vm_offset_t KvaSize; /* max size of kernel virtual address space */ static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ static int pgeflag; /* PG_G or-in */ static int pseflag; /* PG_PS or-in */ -static cpumask_t APTmask; static vm_object_t kptobj; @@ -268,12 +267,15 @@ pmap_pte(pmap_t pmap, vm_offset_t va) /* * pmap_pte_quick: * - * Super fast pmap_pte routine best used when scanning the pv lists. - * This eliminates many course-grained invltlb calls. Note that many of - * the pv list scans are across different pmaps and it is very wasteful - * to do an entire invltlb when checking a single mapping. + * Super fast pmap_pte routine best used when scanning the pv lists. + * This eliminates many course-grained invltlb calls. Note that many of + * the pv list scans are across different pmaps and it is very wasteful + * to do an entire invltlb when checking a single mapping. * - * Should only be called while in a critical section. + * Should only be called while in a critical section. + * + * Unlike get_ptbase(), this function MAY be called from an interrupt or + * interrupt thread. */ static unsigned * pmap_pte_quick(pmap_t pmap, vm_offset_t va) @@ -461,10 +463,12 @@ pmap_bootstrap(vm_paddr_t firstaddr, vm_paddr_t loadaddr) gd->gd_CMAP2 = &SMPpt[pg + 1]; gd->gd_CMAP3 = &SMPpt[pg + 2]; gd->gd_PMAP1 = &SMPpt[pg + 3]; + gd->gd_GDMAP1 = &PTD[KGDTDI]; gd->gd_CADDR1 = CPU_prvspace[0].CPAGE1; gd->gd_CADDR2 = CPU_prvspace[0].CPAGE2; gd->gd_CADDR3 = CPU_prvspace[0].CPAGE3; gd->gd_PADDR1 = (unsigned *)CPU_prvspace[0].PPAGE1; + gd->gd_GDADDR1= (unsigned *)VADDR(KGDTDI, 0); cpu_invltlb(); } @@ -588,7 +592,7 @@ ptbase_assert(struct pmap *pmap) if (pmap == &kernel_pmap || frame == (((unsigned)PTDpde) & PG_FRAME)) { return; } - KKASSERT(frame == (((unsigned)APTDpde) & PG_FRAME)); + KKASSERT(frame == (*mycpu->gd_GDMAP1 & PG_FRAME)); } #else @@ -632,11 +636,26 @@ pmap_track_modified(vm_offset_t va) return 0; } +/* + * Retrieve the mapped page table base for a particular pmap. Use our self + * mapping for the kernel_pmap or our current pmap. + * + * For foreign pmaps we use the per-cpu page table map. Since this involves + * installing a ptd it's actually (per-process x per-cpu). However, we + * still cannot depend on our mapping to survive thread switches because + * the process might be threaded and switching to another thread for the + * same process on the same cpu will allow that other thread to make its + * own mapping. + * + * This could be a bit confusing but the jist is for something like the + * vkernel which uses foreign pmaps all the time this represents a pretty + * good cache that avoids unnecessary invltlb()s. + */ static unsigned * get_ptbase(pmap_t pmap) { unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME; - struct globaldata *gd __debugvar = mycpu; + struct mdglobaldata *gd = mdcpu; /* * We can use PTmap if the pmap is our current address space or @@ -647,24 +666,18 @@ get_ptbase(pmap_t pmap) } /* - * Otherwise we use the alternative address space, APTmap. This - * map is stored in the user portion of the current pmap. However, - * the pmap may still be shared across cpus. Since we are only - * doing a local invltlb we have to keep track of which cpus have - * synced up. + * Otherwise we use the per-cpu alternative page table map. Each + * cpu gets its own map. Because of this we cannot use this map + * from interrupts or threads which can preempt. */ - KKASSERT(gd->gd_intr_nesting_level == 0 && - (gd->gd_curthread->td_flags & TDF_INTTHREAD) == 0); + KKASSERT(gd->mi.gd_intr_nesting_level == 0 && + (gd->mi.gd_curthread->td_flags & TDF_INTTHREAD) == 0); - if (frame != (((unsigned) APTDpde) & PG_FRAME)) { - APTDpde = (pd_entry_t)(frame | PG_RW | PG_V); - APTmask = gd->gd_cpumask; - cpu_invltlb(); - } else if ((APTmask & gd->gd_cpumask) == 0) { - APTmask |= gd->gd_cpumask; + if ((*gd->gd_GDMAP1 & PG_FRAME) != frame) { + *gd->gd_GDMAP1 = frame | PG_RW | PG_V; cpu_invltlb(); } - return (unsigned *) APTmap; + return ((unsigned *)gd->gd_GDADDR1); } /* @@ -1231,11 +1244,13 @@ pmap_release_free_page(struct pmap *pmap, vm_page_t p) * We leave the page directory page cached, wired, and mapped in * the pmap until the dtor function (pmap_puninit()) gets called. * However, still clean it up so we can set PG_ZERO. + * + * The pmap has already been removed from the pmap_list in the + * PTDPTDI case. */ if (p->pindex == PTDPTDI) { bzero(pde + KPTDI, nkpt * PTESIZE); - pde[MPPTDI] = 0; - pde[APTDPTDI] = 0; + bzero(pde + KGDTDI, (NPDEPG - KGDTDI) * PTESIZE); vm_page_flag_set(p, PG_ZERO); vm_page_wakeup(p); } else { @@ -2623,12 +2638,8 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, } dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME; - if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) { - APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V); - APTmask = gd->gd_cpumask; - cpu_invltlb(); - } else if ((APTmask & gd->gd_cpumask) == 0) { - APTmask |= gd->gd_cpumask; + if ((*gd->gd_GDMAP1 & PG_FRAME) != dst_frame) { + *gd->gd_GDMAP1 = dst_frame | PG_RW | PG_V; cpu_invltlb(); } pmap_inval_init(&info); @@ -2704,7 +2715,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, dstmpte = pmap_allocpte(dst_pmap, addr); if (src_frame != (((unsigned) PTDpde) & PG_FRAME) || - dst_frame != (((unsigned) APTDpde) & PG_FRAME) + XXX dst_frame != (((unsigned) xxx) & PG_FRAME) ) { kprintf("WARNING: pmap_copy: detected and corrected race\n"); pmap_unwire_pte_hold(dst_pmap, dstmpte, &info); diff --git a/sys/platform/pc32/include/globaldata.h b/sys/platform/pc32/include/globaldata.h index 6fc2220e68..3fcf66772d 100644 --- a/sys/platform/pc32/include/globaldata.h +++ b/sys/platform/pc32/include/globaldata.h @@ -88,10 +88,12 @@ struct mdglobaldata { pt_entry_t *gd_CMAP2; pt_entry_t *gd_CMAP3; pt_entry_t *gd_PMAP1; + pt_entry_t *gd_GDMAP1; /* per-cpu whole page table map */ caddr_t gd_CADDR1; caddr_t gd_CADDR2; caddr_t gd_CADDR3; unsigned *gd_PADDR1; + unsigned *gd_GDADDR1; /* per-cpu whole page table map va */ u_int gd_acpi_id; u_int gd_apic_id; }; @@ -112,7 +114,7 @@ struct mdglobaldata { * for SMPpt[] setup in i386/i386/mp_machdep.c and locore.s. * * WARNING! sizeof(privatespace[SMP_MAXCPU]) must fit in the KVA - * reserved for the SMPpt page table (typically one page table page). + * reserved for the SMPpt page table (typically one page table page = 4MB). * * WARNING! This structure must be a multiple of PAGE_SIZE. */ diff --git a/sys/platform/pc32/include/pmap.h b/sys/platform/pc32/include/pmap.h index 0b4d04b205..d5d45a05b2 100644 --- a/sys/platform/pc32/include/pmap.h +++ b/sys/platform/pc32/include/pmap.h @@ -61,44 +61,72 @@ #endif /* - * Pte related macros + * PTE related macros */ #define VADDR(pdi, pti) ((vm_offset_t)(((pdi)< KVA_PAGES - 2 -#error "Maximum NKPDE is KVA_PAGES - 2" +#if NKPDE > KVA_PAGES - NKGDPDE - 2 +#error "Maximum NKPDE is KVA_PAGES - NKGDPDE - 2" #endif /* * The *PTDI values control the layout of virtual memory * - * XXX This works for now, but I am not real happy with it, I'll fix it - * right after I fix locore.s and the magic 28K hole - * - * SMP_PRIVPAGES: The per-cpu address space is 0xff80000 -> 0xffbfffff - * * NPEDEPG - number of pde's in the page directory (1024) - * NKPDE - typically (KVA_PAGES - 2) where KVA_PAGES is typically 256 + * NKPDE - max general kernel page table pages not including + * special PTDs. Typically KVA_PAGES minus the number + * of special PTDs. + * + * +---------------+ End of kernel memory + * | APTDPTDI | currently unused alt page table map + * +---------------+ + * | MPPTDI | globaldata array + * +---------------+ + * | | + * | | per-cpu page table self-maps + * |KGDTDI[NKGDPDE]| + * +---------------+ + * | | + * | | + * | | + * | | general kernel page table pages + * | | + * | KPTDI[NKPDE] | + * +---------------+ Start of kernel memory + * | PTDPTDI | self-mapping of current pmap + * +---------------+ * * This typically places PTDPTDI at the index corresponding to VM address - * (0xc0000000 - 4M) = bfc00000, and that is where PTmap[] is based. + * (0xc0000000 - 4M) = bfc00000, and that is where PTmap[] is based for + * the self-mapped page table. PTD points to the self-mapped page + * directory itself and any indexes >= KPTDI will correspond to the + * common kernel page directory pages since all pmaps map the same ones. + * + * We no longer use APTmap or APTDpde (corresponding to APTDPTDI). This + * was a global page table map for accessing pmaps other then the current + * pmap. Instead we now implement an alternative pmap for EACH cpu + * use the ptds at KGDTDI. * - * APTmap[] is typically based at 0xffc00000 corresponding to the page - * directory index of APTDPTDI. APTDpde is self-mapped at 0xbfeffffc, - * the last pde entry in the pmap's page directory just before the - * kernel pde's start. + * Even though the maps are per-cpu the PTD entries are stored in the + * individual pmaps and obviously not replicated so each process pmap + * essentially gets its own per-cpu cache (PxN) making for fairly efficient + * access. * * UMAXPTDI - highest inclusive ptd index for user space */ #define APTDPTDI (NPDEPG-1) /* alt ptd entry that points to APTD */ -#define MPPTDI (APTDPTDI-1) /* per cpu ptd entry */ -#define KPTDI (MPPTDI-NKPDE) /* start of kernel virtual pde's */ +#define MPPTDI (APTDPTDI-1) /* globaldata array ptd entry */ +#define KGDTDI (MPPTDI-NKGDPDE) /* per-cpu page table mappings */ +#define KPTDI (KGDTDI-NKPDE) /* start of kernel virtual pde's */ #define PTDPTDI (KPTDI-1) /* ptd entry that points to ptd! */ #define UMAXPTDI (PTDPTDI-1) /* ptd entry for user space end */ -- 2.41.0