From 0f7a3396d20e0e3035c6fed2a9c3375ac6c5e8bb Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Tue, 17 Feb 2004 19:38:54 +0000 Subject: [PATCH] Introduce an MI cpu synchronization API, redo the SMP AP startup code, and start cleaning up deprecated IPI and clock code. Add a MMU/TLB page table invalidation API (pmap_inval.c) which properly synchronizes page table changes with other cpus in SMP environments. * removed (unused) gd_cpu_lockid * remove confusing invltlb() and friends, normalize use of cpu_invltlb() and smp_invltlb(). * redo the SMP AP startup code to make the system work better in situations where all APs do not startup. * add memory barrier API, cpu_mb1() and cpu_mb2(). * remove (obsolete, no longer used) old IPI hard and stat clock forwarding code. * add a cpu synchronization API which is capable of handling multiple simultanious requests without deadlocking or livelocking. * major changes to the PMAP code to use the new invalidation API. * remove (unused) all_procs_ipi() and self_ipi(). * only use all_but_self_ipi() if it is known that all AP's started up, otherwise use a mask. * remove (obsolete, no longer usde) BETTER_CLOCK code * remove (obsolete, no longer used) Xcpucheckstate IPI code Testing-by: David Rhodus and others --- sys/amd64/amd64/genassym.c | 3 +- sys/conf/files.i386 | 3 +- sys/cpu/i386/include/cpu.h | 4 +- sys/cpu/i386/include/cpufunc.h | 45 +-- sys/i386/apic/apic_vector.s | 70 +--- sys/i386/apic/mpapic.h | 22 +- sys/i386/i386/bios.c | 9 +- sys/i386/i386/db_interface.c | 8 +- sys/i386/i386/genassym.c | 3 +- sys/i386/i386/globals.s | 5 +- sys/i386/i386/machdep.c | 6 +- sys/i386/i386/mp_machdep.c | 466 ++++--------------------- sys/i386/i386/pmap.c | 316 ++++++++--------- sys/i386/i386/pmap_inval.c | 146 ++++++++ sys/i386/i386/vm_machdep.c | 9 +- sys/i386/include/cpu.h | 4 +- sys/i386/include/cpufunc.h | 45 +-- sys/i386/include/globaldata.h | 4 +- sys/i386/include/mpapic.h | 22 +- sys/i386/include/pmap_inval.h | 51 +++ sys/i386/include/smp.h | 14 +- sys/i386/include/smptests.h | 24 +- sys/i386/isa/apic_vector.s | 70 +--- sys/i386/isa/intr_machdep.h | 11 +- sys/kern/kern_shutdown.c | 6 +- sys/kern/lwkt_ipiq.c | 137 +++++--- sys/kern/lwkt_thread.c | 55 ++- sys/platform/pc32/apic/apic_vector.s | 70 +--- sys/platform/pc32/apic/mpapic.h | 22 +- sys/platform/pc32/i386/bios.c | 9 +- sys/platform/pc32/i386/db_interface.c | 8 +- sys/platform/pc32/i386/genassym.c | 3 +- sys/platform/pc32/i386/globals.s | 5 +- sys/platform/pc32/i386/machdep.c | 6 +- sys/platform/pc32/i386/mp_machdep.c | 466 ++++--------------------- sys/platform/pc32/i386/pmap.c | 316 ++++++++--------- sys/platform/pc32/i386/pmap_inval.c | 146 ++++++++ sys/platform/pc32/i386/vm_machdep.c | 9 +- sys/platform/pc32/include/globaldata.h | 4 +- sys/platform/pc32/include/mpapic.h | 22 +- sys/platform/pc32/include/pmap_inval.h | 51 +++ sys/platform/pc32/include/smp.h | 14 +- sys/platform/pc32/include/smptests.h | 24 +- sys/platform/pc32/isa/apic_vector.s | 70 +--- sys/platform/pc32/isa/intr_machdep.h | 11 +- sys/platform/vkernel/i386/genassym.c | 3 +- sys/sys/thread.h | 4 +- 47 files changed, 1051 insertions(+), 1770 deletions(-) create mode 100644 sys/i386/i386/pmap_inval.c create mode 100644 sys/i386/include/pmap_inval.h create mode 100644 sys/platform/pc32/i386/pmap_inval.c create mode 100644 sys/platform/pc32/include/pmap_inval.h diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index 28d17ab132..d8d1146365 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -35,7 +35,7 @@ * * from: @(#)genassym.c 5.11 (Berkeley) 5/10/91 * $FreeBSD: src/sys/i386/i386/genassym.c,v 1.86.2.3 2002/03/03 05:42:49 nyan Exp $ - * $DragonFly: src/sys/amd64/amd64/Attic/genassym.c,v 1.1 2004/02/02 08:05:51 dillon Exp $ + * $DragonFly: src/sys/amd64/amd64/Attic/genassym.c,v 1.2 2004/02/17 19:38:53 dillon Exp $ */ #include @@ -215,7 +215,6 @@ ASSYM(GD_COMMON_TSS, offsetof(struct mdglobaldata, gd_common_tss)); ASSYM(GD_COMMON_TSSD, offsetof(struct mdglobaldata, gd_common_tssd)); ASSYM(GD_TSS_GDT, offsetof(struct mdglobaldata, gd_tss_gdt)); ASSYM(GD_NPXTHREAD, offsetof(struct mdglobaldata, gd_npxthread)); -ASSYM(GD_CPU_LOCKID, offsetof(struct mdglobaldata, gd_cpu_lockid)); ASSYM(GD_OTHER_CPUS, offsetof(struct mdglobaldata, gd_other_cpus)); ASSYM(GD_SS_EFLAGS, offsetof(struct mdglobaldata, gd_ss_eflags)); ASSYM(GD_CMAP1, offsetof(struct mdglobaldata, gd_CMAP1)); diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index 2db14a6ec3..c1d9b65d4b 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -2,7 +2,7 @@ # files marked standard are always included. # # $FreeBSD: src/sys/conf/files.i386,v 1.307.2.38 2003/01/02 20:41:33 kan Exp $ -# $DragonFly: src/sys/conf/Attic/files.i386,v 1.16 2004/02/16 21:08:24 dillon Exp $ +# $DragonFly: src/sys/conf/Attic/files.i386,v 1.17 2004/02/17 19:38:53 dillon Exp $ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and @@ -213,6 +213,7 @@ i386/i386/nexus.c standard i386/i386/perfmon.c optional perfmon i386/i386/perfmon.c optional perfmon profiling-routine i386/i386/pmap.c standard +i386/i386/pmap_inval.c standard i386/i386/procfs_machdep.c standard i386/i386/spinlock.s standard i386/i386/support.s standard diff --git a/sys/cpu/i386/include/cpu.h b/sys/cpu/i386/include/cpu.h index 82a0ee87c0..de37feac99 100644 --- a/sys/cpu/i386/include/cpu.h +++ b/sys/cpu/i386/include/cpu.h @@ -35,7 +35,7 @@ * * from: @(#)cpu.h 5.4 (Berkeley) 5/9/91 * $FreeBSD: src/sys/i386/include/cpu.h,v 1.43.2.2 2001/06/15 09:37:57 scottl Exp $ - * $DragonFly: src/sys/cpu/i386/include/cpu.h,v 1.13 2004/01/30 05:42:16 dillon Exp $ + * $DragonFly: src/sys/cpu/i386/include/cpu.h,v 1.14 2004/02/17 19:38:53 dillon Exp $ */ #ifndef _MACHINE_CPU_H_ @@ -76,6 +76,8 @@ atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_RESCHED) #define need_proftick() \ atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_OWEUPC) +#define need_ipiq() \ + atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_IPIQ) #define signotify() \ atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_SIGNAL) #define sigupcall() \ diff --git a/sys/cpu/i386/include/cpufunc.h b/sys/cpu/i386/include/cpufunc.h index c3134724b5..d1937b5364 100644 --- a/sys/cpu/i386/include/cpufunc.h +++ b/sys/cpu/i386/include/cpufunc.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/cpufunc.h,v 1.96.2.3 2002/04/28 22:50:54 dwmalone Exp $ - * $DragonFly: src/sys/cpu/i386/include/cpufunc.h,v 1.7 2003/08/26 21:42:18 rob Exp $ + * $DragonFly: src/sys/cpu/i386/include/cpufunc.h,v 1.8 2004/02/17 19:38:53 dillon Exp $ */ /* @@ -142,6 +142,18 @@ cpu_enable_intr(void) __asm __volatile("sti"); } +static __inline void +cpu_mb1(void) +{ + __asm __volatile("" : : : "memory"); +} + +static __inline void +cpu_mb2(void) +{ + __asm __volatile("cpuid" : : : "ax", "bx", "cx", "dx", "memory"); +} + #define HAVE_INLINE_FFS static __inline int @@ -313,35 +325,6 @@ cpu_invltlb(void) #endif } -/* - * Invalidate a patricular VA on all cpus - */ -static __inline void -invlpg(u_int addr) -{ - __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); - smp_invltlb(); -} - -/* - * Invalidate the TLB on all cpus - */ -static __inline void -invltlb(void) -{ - u_int temp; - /* - * This should be implemented as load_cr3(rcr3()) when load_cr3() - * is inlined. - */ - __asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp) - : : "memory"); - smp_invltlb(); -#ifdef SWTCH_OPTIM_STATS - ++tlb_flush_count; -#endif -} - #endif /* _KERNEL */ static __inline u_short @@ -635,8 +618,6 @@ void insb (u_int port, void *addr, size_t cnt); void insl (u_int port, void *addr, size_t cnt); void insw (u_int port, void *addr, size_t cnt); void invd (void); -void invlpg (u_int addr); -void invltlb (void); u_short inw (u_int port); u_int loadandclear (u_int *addr); void outb (u_int port, u_char data); diff --git a/sys/i386/apic/apic_vector.s b/sys/i386/apic/apic_vector.s index 27a45b3966..c3b1b6b00c 100644 --- a/sys/i386/apic/apic_vector.s +++ b/sys/i386/apic/apic_vector.s @@ -1,7 +1,7 @@ /* * from: vector.s, 386BSD 0.1 unknown origin * $FreeBSD: src/sys/i386/isa/apic_vector.s,v 1.47.2.5 2001/09/01 22:33:38 tegge Exp $ - * $DragonFly: src/sys/i386/apic/Attic/apic_vector.s,v 1.16 2004/02/12 06:57:46 dillon Exp $ + * $DragonFly: src/sys/i386/apic/Attic/apic_vector.s,v 1.17 2004/02/17 19:38:54 dillon Exp $ */ @@ -338,69 +338,6 @@ Xinvltlb: iret -#if 0 -#ifdef BETTER_CLOCK - -/* - * Executed by a CPU when it receives an Xcpucheckstate IPI from another CPU, - * - * - Stores current cpu state in checkstate_cpustate[cpuid] - * 0 == user, 1 == sys, 2 == intr - * - Stores current process in checkstate_curproc[cpuid] - * - * - Signals its receipt by setting bit cpuid in checkstate_probed_cpus. - * - * stack: 0->ds, 4->fs, 8->ebx, 12->eax, 16->eip, 20->cs, 24->eflags - */ - - .text - SUPERALIGN_TEXT - .globl Xcpucheckstate - .globl checkstate_cpustate - .globl checkstate_curproc - .globl checkstate_pc -Xcpucheckstate: - pushl %eax - pushl %ebx - pushl %ds /* save current data segment */ - pushl %fs - - movl $KDSEL, %eax - mov %ax, %ds /* use KERNEL data segment */ - movl $KPSEL, %eax - mov %ax, %fs - - movl $0, lapic_eoi /* End Of Interrupt to APIC */ - - movl $0, %ebx - movl 20(%esp), %eax - andl $3, %eax - cmpl $3, %eax - je 1f - testl $PSL_VM, 24(%esp) - jne 1f - incl %ebx /* system or interrupt */ -1: - movl PCPU(cpuid), %eax - movl %ebx, checkstate_cpustate(,%eax,4) - movl PCPU(curthread), %ebx - movl TD_PROC(%ebx),%ebx - movl %ebx, checkstate_curproc(,%eax,4) - movl 16(%esp), %ebx - movl %ebx, checkstate_pc(,%eax,4) - - lock /* checkstate_probed_cpus |= (1<gd_other_cpus && smp_started != 0) { + if (stopped_cpus != mycpu->gd_other_cpus) { db_printf("whoa, other_cpus: 0x%08x, stopped_cpus: 0x%08x\n", mycpu->gd_other_cpus, stopped_cpus); panic("stop_cpus() failed"); @@ -278,7 +278,7 @@ db_write_bytes(addr, size, data) } } - invltlb(); + cpu_invltlb(); } dst = (char *)addr; @@ -294,7 +294,7 @@ db_write_bytes(addr, size, data) if (ptep1) *ptep1 = oldmap1; - invltlb(); + cpu_invltlb(); } } diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c index 942c5be818..048ccdc2a2 100644 --- a/sys/i386/i386/genassym.c +++ b/sys/i386/i386/genassym.c @@ -35,7 +35,7 @@ * * from: @(#)genassym.c 5.11 (Berkeley) 5/10/91 * $FreeBSD: src/sys/i386/i386/genassym.c,v 1.86.2.3 2002/03/03 05:42:49 nyan Exp $ - * $DragonFly: src/sys/i386/i386/Attic/genassym.c,v 1.33 2003/12/20 05:52:25 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/genassym.c,v 1.34 2004/02/17 19:38:53 dillon Exp $ */ #include @@ -201,7 +201,6 @@ ASSYM(GD_COMMON_TSS, offsetof(struct mdglobaldata, gd_common_tss)); ASSYM(GD_COMMON_TSSD, offsetof(struct mdglobaldata, gd_common_tssd)); ASSYM(GD_TSS_GDT, offsetof(struct mdglobaldata, gd_tss_gdt)); ASSYM(GD_NPXTHREAD, offsetof(struct mdglobaldata, gd_npxthread)); -ASSYM(GD_CPU_LOCKID, offsetof(struct mdglobaldata, gd_cpu_lockid)); ASSYM(GD_OTHER_CPUS, offsetof(struct mdglobaldata, gd_other_cpus)); ASSYM(GD_SS_EFLAGS, offsetof(struct mdglobaldata, gd_ss_eflags)); ASSYM(GD_CMAP1, offsetof(struct mdglobaldata, gd_CMAP1)); diff --git a/sys/i386/i386/globals.s b/sys/i386/i386/globals.s index 9871179ed3..6fe54bab44 100644 --- a/sys/i386/i386/globals.s +++ b/sys/i386/i386/globals.s @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/globals.s,v 1.13.2.1 2000/05/16 06:58:06 dillon Exp $ - * $DragonFly: src/sys/i386/i386/Attic/globals.s,v 1.19 2003/12/20 05:52:25 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/globals.s,v 1.20 2004/02/17 19:38:53 dillon Exp $ */ #include @@ -73,7 +73,7 @@ * The BSP version of these get setup in locore.s and pmap.c, while * the AP versions are setup in mp_machdep.c. */ - .globl gd_cpuid, gd_cpu_lockid, gd_other_cpus + .globl gd_cpuid, gd_other_cpus .globl gd_ss_eflags, gd_intr_nesting_level .globl gd_CMAP1, gd_CMAP2, gd_CMAP3, gd_PMAP1 .globl gd_CADDR1, gd_CADDR2, gd_CADDR3, gd_PADDR1 @@ -81,7 +81,6 @@ .set gd_cpuid,globaldata + GD_CPUID .set gd_private_tss,globaldata + GD_PRIVATE_TSS - .set gd_cpu_lockid,globaldata + GD_CPU_LOCKID .set gd_other_cpus,globaldata + GD_OTHER_CPUS .set gd_ss_eflags,globaldata + GD_SS_EFLAGS .set gd_intr_nesting_level,globaldata + GD_INTR_NESTING_LEVEL diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index b57ac1e50b..5e9400c56b 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -36,7 +36,7 @@ * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $ - * $DragonFly: src/sys/i386/i386/Attic/machdep.c,v 1.53 2004/02/14 19:58:50 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/machdep.c,v 1.54 2004/02/17 19:38:53 dillon Exp $ */ #include "use_apm.h" @@ -1665,7 +1665,7 @@ physmap_done: * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_N; - invltlb(); + cpu_invltlb(); tmp = *(int *)ptr; /* @@ -1734,7 +1734,7 @@ physmap_done: } } *pte = 0; - invltlb(); + cpu_invltlb(); /* * XXX diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index 134bb68243..1f36e746e2 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -23,7 +23,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.115.2.15 2003/03/14 21:22:35 jhb Exp $ - * $DragonFly: src/sys/i386/i386/Attic/mp_machdep.c,v 1.21 2004/01/30 05:42:16 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/mp_machdep.c,v 1.22 2004/02/17 19:38:53 dillon Exp $ */ #include "opt_cpu.h" @@ -40,9 +40,6 @@ #include #include #include -#ifdef BETTER_CLOCK -#include -#endif #include /* cngetc() */ #include @@ -270,10 +267,6 @@ int cpu_num_to_apic_id[NAPICID]; int io_num_to_apic_id[NAPICID]; int apic_id_to_logical[NAPICID]; - -/* Bitmap of all available CPUs */ -u_int all_cpus; - /* AP uses this during bootstrap. Do not staticize. */ char *bootSTK; static int bootAP; @@ -286,8 +279,6 @@ extern pt_entry_t *SMPpt; struct pcb stoppcbs[MAXCPU]; -int smp_started; /* has the system started? */ - /* * Local data and functions. */ @@ -295,6 +286,7 @@ int smp_started; /* has the system started? */ static int mp_capable; static u_int boot_address; static u_int base_memory; +static cpumask_t smp_startup_mask = 1; /* which cpus have been started */ static int picmode; /* 0: virtual wire mode, 1: PIC mode */ static mpfps_t mpfps; @@ -309,9 +301,12 @@ static void fix_mp_table(void); static void setup_apic_irq_mapping(void); static int start_all_aps(u_int boot_addr); static void install_ap_tramp(u_int boot_addr); -static int start_ap(int logicalCpu, u_int boot_addr); +static int start_ap(struct mdglobaldata *gd, u_int boot_addr); static int apic_int_is_bus_type(int intr, int bus_type); +cpumask_t smp_active_mask = 1; /* which cpus are ready for IPIs etc? */ +SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RD, &smp_active_mask, 0, ""); + /* * Calculate usable address in base memory for AP trampoline code. */ @@ -433,11 +428,14 @@ init_secondary(void) int x, myid = bootAP; u_int cr0; struct mdglobaldata *md; + struct privatespace *ps; - gdt_segs[GPRIV_SEL].ssd_base = (int) &CPU_prvspace[myid]; + ps = &CPU_prvspace[myid]; + + gdt_segs[GPRIV_SEL].ssd_base = (int)ps; gdt_segs[GPROC0_SEL].ssd_base = - (int) &CPU_prvspace[myid].mdglobaldata.gd_common_tss; - CPU_prvspace[myid].mdglobaldata.mi.gd_prvspace = &CPU_prvspace[myid]; + (int) &ps->mdglobaldata.gd_common_tss; + ps->mdglobaldata.mi.gd_prvspace = ps; for (x = 0; x < NGDT; x++) { ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); @@ -455,7 +453,7 @@ init_secondary(void) gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; - md = mdcpu; + md = mdcpu; /* loaded through %fs:0 (mdglobaldata.mi.gd_prvspace)*/ md->gd_common_tss.tss_esp0 = 0; /* not used until after switch */ md->gd_common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); @@ -534,13 +532,13 @@ mp_enable(u_int boot_addr) /* turn on 4MB of V == P addressing so we can get to MP table */ *(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME); - invltlb(); + cpu_invltlb(); /* examine the MP table for needed info, uses physical addresses */ x = mptable_pass2(); *(int *)PTD = 0; - invltlb(); + cpu_invltlb(); /* can't process default configs till the CPU APIC is pmapped */ if (x) @@ -572,14 +570,6 @@ mp_enable(u_int boot_addr) setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); -#if 0 -#ifdef BETTER_CLOCK - /* install an inter-CPU IPI for reading processor state */ - setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate, - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); -#endif -#endif - /* install an inter-CPU IPI for IPIQ messaging */ setidt(XIPIQ_OFFSET, Xipiq, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); @@ -588,16 +578,6 @@ mp_enable(u_int boot_addr) setidt(XRENDEZVOUS_OFFSET, Xrendezvous, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); -#if 0 - /* install an inter-CPU IPI for forcing an additional software trap */ - setidt(XCPUAST_OFFSET, Xcpuast, - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - - /* install an inter-CPU IPI for interrupt forwarding */ - setidt(XFORWARD_IRQ_OFFSET, Xforward_irq, - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); -#endif - /* install an inter-CPU IPI for CPU stop/restart */ setidt(XCPUSTOP_OFFSET, Xcpustop, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); @@ -1969,6 +1949,7 @@ start_all_aps(u_int boot_addr) u_char mpbiosreason; u_long mpbioswarmvec; struct mdglobaldata *gd; + struct privatespace *ps; char *stack; uintptr_t kptbase; @@ -1989,16 +1970,13 @@ start_all_aps(u_int boot_addr) mpbiosreason = inb(CMOS_DATA); #endif - /* record BSP in CPU map */ - all_cpus = 1; - /* set up temporary P==V mapping for AP boot */ /* XXX this is a hack, we should boot the AP on its own stack/PTD */ kptbase = (uintptr_t)(void *)KPTphys; for (x = 0; x < NKPT; x++) PTD[x] = (pd_entry_t)(PG_V | PG_RW | ((kptbase + x * PAGE_SIZE) & PG_FRAME)); - invltlb(); + cpu_invltlb(); /* start each AP */ for (x = 1; x <= mp_naps; ++x) { @@ -2028,20 +2006,19 @@ start_all_aps(u_int boot_addr) gd = &CPU_prvspace[x].mdglobaldata; /* official location */ bzero(gd, sizeof(*gd)); - gd->mi.gd_prvspace = &CPU_prvspace[x]; + gd->mi.gd_prvspace = ps = &CPU_prvspace[x]; /* prime data page for it to use */ mi_gdinit(&gd->mi, x); cpu_gdinit(gd, x); - gd->gd_cpu_lockid = x << 24; gd->gd_CMAP1 = &SMPpt[pg + 1]; gd->gd_CMAP2 = &SMPpt[pg + 2]; gd->gd_CMAP3 = &SMPpt[pg + 3]; gd->gd_PMAP1 = &SMPpt[pg + 4]; - gd->gd_CADDR1 = CPU_prvspace[x].CPAGE1; - gd->gd_CADDR2 = CPU_prvspace[x].CPAGE2; - gd->gd_CADDR3 = CPU_prvspace[x].CPAGE3; - gd->gd_PADDR1 = (unsigned *)CPU_prvspace[x].PPAGE1; + gd->gd_CADDR1 = ps->CPAGE1; + gd->gd_CADDR2 = ps->CPAGE2; + gd->gd_CADDR3 = ps->CPAGE3; + gd->gd_PADDR1 = (unsigned *)ps->PPAGE1; gd->mi.gd_ipiq = (void *)kmem_alloc(kernel_map, sizeof(lwkt_ipiq) * (mp_naps + 1)); bzero(gd->mi.gd_ipiq, sizeof(lwkt_ipiq) * (mp_naps + 1)); @@ -2056,12 +2033,12 @@ start_all_aps(u_int boot_addr) /* * Setup the AP boot stack */ - bootSTK = &CPU_prvspace[x].idlestack[UPAGES*PAGE_SIZE/2]; + bootSTK = &ps->idlestack[UPAGES*PAGE_SIZE/2]; bootAP = x; /* attempt to start the Application Processor */ CHECK_INIT(99); /* setup checkpoints */ - if (!start_ap(x, boot_addr)) { + if (!start_ap(gd, boot_addr)) { printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x)); CHECK_PRINT("trace"); /* show checkpoints */ /* better panic as the AP may be running loose */ @@ -2073,12 +2050,13 @@ start_all_aps(u_int boot_addr) /* record its version info */ cpu_apic_versions[x] = cpu_apic_versions[0]; - - all_cpus |= (1 << x); /* record AP in CPU map */ } + /* set ncpus to 1 + highest logical cpu. Not all may have come up */ + ncpus = x; + /* build our map of 'other' CPUs */ - mycpu->gd_other_cpus = all_cpus & ~(1 << mycpu->gd_cpuid); + mycpu->gd_other_cpus = smp_startup_mask & ~(1 << mycpu->gd_cpuid); mycpu->gd_ipiq = (void *)kmem_alloc(kernel_map, sizeof(lwkt_ipiq) * ncpus); bzero(mycpu->gd_ipiq, sizeof(lwkt_ipiq) * ncpus); @@ -2176,24 +2154,20 @@ install_ap_tramp(u_int boot_addr) * before the AP goes into the LWKT scheduler's idle loop. */ static int -start_ap(int logical_cpu, u_int boot_addr) +start_ap(struct mdglobaldata *gd, u_int boot_addr) { int physical_cpu; int vector; - int cpus; u_long icr_lo, icr_hi; POSTCODE(START_AP_POST); /* get the PHYSICAL APIC ID# */ - physical_cpu = CPU_TO_ID(logical_cpu); + physical_cpu = CPU_TO_ID(gd->mi.gd_cpuid); /* calculate the vector */ vector = (boot_addr >> 12) & 0xff; - /* used as a watchpoint to signal AP startup */ - cpus = ncpus; - /* Make sure the target cpu sees everything */ wbinvd(); @@ -2255,7 +2229,7 @@ start_ap(int logical_cpu, u_int boot_addr) /* wait for it to start, see ap_init() */ set_apic_timer(5000000);/* == 5 seconds */ while (read_apic_timer()) { - if (ncpus > cpus) + if (smp_startup_mask & (1 << gd->mi.gd_cpuid)) return 1; /* return SUCCESS */ } return 0; /* return FAILURE */ @@ -2263,16 +2237,21 @@ start_ap(int logical_cpu, u_int boot_addr) /* - * Flush the TLB on all other CPU's + * Lazy flush the TLB on all other CPU's. DEPRECATED. * - * XXX: Needs to handshake and wait for completion before proceding. + * If for some reason we were unable to start all cpus we cannot safely + * use broadcast IPIs. */ void smp_invltlb(void) { #if defined(APIC_IO) - if (smp_started && invltlb_ok) + if (smp_startup_mask == smp_active_mask) { all_but_self_ipi(XINVLTLB_OFFSET); + } else { + selected_apic_ipi(smp_active_mask, XINVLTLB_OFFSET, + APIC_DELMODE_FIXED); + } #endif /* APIC_IO */ } @@ -2296,8 +2275,7 @@ smp_invltlb(void) int stop_cpus(u_int map) { - if (!smp_started) - return 0; + map &= smp_active_mask; /* send the Xcpustop IPI to all CPUs in map */ selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED); @@ -2325,10 +2303,8 @@ stop_cpus(u_int map) int restart_cpus(u_int map) { - if (!smp_started) - return 0; - - started_cpus = map; /* signal other cpus to restart */ + /* signal other cpus to restart */ + started_cpus = map & smp_active_mask; while ((stopped_cpus & map) != 0) /* wait for each to clear its bit */ /* spin */ ; @@ -2336,36 +2312,6 @@ restart_cpus(u_int map) return 1; } -int smp_active = 0; /* are the APs allowed to run? */ -SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, ""); - -/* XXX maybe should be hw.ncpu */ -static int smp_cpus = 1; /* how many cpu's running */ -SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, ""); - -int invltlb_ok = 0; /* throttle smp_invltlb() till safe */ -SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, ""); - -/* Warning: Do not staticize. Used from swtch.s */ -int do_page_zero_idle = 1; /* bzero pages for fun and profit in idleloop */ -SYSCTL_INT(_machdep, OID_AUTO, do_page_zero_idle, CTLFLAG_RW, - &do_page_zero_idle, 0, ""); - -/* Is forwarding of a interrupt to the CPU holding the ISR lock enabled ? */ -int forward_irq_enabled = 1; -SYSCTL_INT(_machdep, OID_AUTO, forward_irq_enabled, CTLFLAG_RW, - &forward_irq_enabled, 0, ""); - -/* Enable forwarding of a signal to a process running on a different CPU */ -static int forward_signal_enabled = 1; -SYSCTL_INT(_machdep, OID_AUTO, forward_signal_enabled, CTLFLAG_RW, - &forward_signal_enabled, 0, ""); - -/* Enable forwarding of roundrobin to all other cpus */ -static int forward_roundrobin_enabled = 1; -SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW, - &forward_roundrobin_enabled, 0, ""); - /* * This is called once the mpboot code has gotten us properly relocated * and the MMU turned on, etc. ap_init() is actually the idle thread, @@ -2380,11 +2326,16 @@ ap_init(void) u_int apic_id; /* - * Signal the BSP that we have started up successfully by incrementing - * ncpus. Note that we do not hold the BGL yet. The BSP is waiting - * for our signal. + * Adjust smp_startup_mask to signal the BSP that we have started + * up successfully. Note that we do not yet hold the BGL. The BSP + * is waiting for our signal. + * + * We can't set our bit in smp_active_mask yet because we are holding + * interrupts physically disabled and remote cpus could deadlock + * trying to send us an IPI. */ - ++ncpus; + smp_startup_mask |= 1 << mycpu->gd_cpuid; + cpu_mb1(); /* * Get the MP lock so we can finish initializing. Note: we are @@ -2397,13 +2348,14 @@ ap_init(void) /* BSP may have changed PTD while we're waiting for the lock */ cpu_invltlb(); + smp_active_mask |= 1 << mycpu->gd_cpuid; #if defined(I586_CPU) && !defined(NO_F00F_HACK) lidt(&r_idt); #endif /* Build our map of 'other' CPUs. */ - mycpu->gd_other_cpus = all_cpus & ~(1 << mycpu->gd_cpuid); + mycpu->gd_other_cpus = smp_startup_mask & ~(1 << mycpu->gd_cpuid); printf("SMP: AP CPU #%d Launched!\n", mycpu->gd_cpuid); @@ -2431,18 +2383,6 @@ ap_init(void) /* Set memory range attributes for this CPU to match the BSP */ mem_range_AP_init(); - /* - * Since we have the BGL if smp_cpus matches ncpus then we are - * the last AP to get to this point and we can enable IPI's, - * tlb shootdowns, freezes, and so forth. - */ - ++smp_cpus; - if (smp_cpus == ncpus) { - invltlb_ok = 1; - smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */ - smp_active = 1; /* historic */ - } - /* * AP helper function for kernel memory support. This will create * a memory reserve for the AP that is necessary to avoid certain @@ -2464,288 +2404,6 @@ ap_init(void) rel_mplock(); } -#ifdef BETTER_CLOCK - -#define CHECKSTATE_USER 0 -#define CHECKSTATE_SYS 1 -#define CHECKSTATE_INTR 2 - -/* Do not staticize. Used from apic_vector.s */ -struct thread *checkstate_curtd[MAXCPU]; -int checkstate_cpustate[MAXCPU]; -u_long checkstate_pc[MAXCPU]; - -#define PC_TO_INDEX(pc, prof) \ - ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \ - (u_quad_t)((prof)->pr_scale)) >> 16) & ~1) - -#if 0 -static void -addupc_intr_forwarded(struct proc *p, int id, int *astmap) -{ - int i; - struct uprof *prof; - u_long pc; - - pc = checkstate_pc[id]; - prof = &p->p_stats->p_prof; - if (pc >= prof->pr_off && - (i = PC_TO_INDEX(pc, prof)) < prof->pr_size) { - if ((p->p_flag & P_OWEUPC) == 0) { - prof->pr_addr = pc; - prof->pr_ticks = 1; - p->p_flag |= P_OWEUPC; - } - *astmap |= (1 << id); - } -} -#endif - -#if 0 -static void -forwarded_statclock(int id, int pscnt, int *astmap) -{ -#if 0 - struct pstats *pstats; - long rss; - struct rusage *ru; - struct vmspace *vm; - int cpustate; - struct thread *td; -#ifdef GPROF - register struct gmonparam *g; - int i; -#endif - - t = checkstate_curtd[id]; - cpustate = checkstate_cpustate[id]; - - switch (cpustate) { - case CHECKSTATE_USER: - if (td->td_proc && td->td_proc->p_flag & P_PROFIL) - addupc_intr_forwarded(td->td_proc, id, astmap); - if (pscnt > 1) - return; - p->p_uticks++; - if (p->p_nice > NZERO) - cp_time[CP_NICE]++; - else - cp_time[CP_USER]++; - break; - case CHECKSTATE_SYS: -#ifdef GPROF - /* - * Kernel statistics are just like addupc_intr, only easier. - */ - g = &_gmonparam; - if (g->state == GMON_PROF_ON) { - i = checkstate_pc[id] - g->lowpc; - if (i < g->textsize) { - i /= HISTFRACTION * sizeof(*g->kcount); - g->kcount[i]++; - } - } -#endif - if (pscnt > 1) - return; - - if (!p) - cp_time[CP_IDLE]++; - else { - p->p_sticks++; - cp_time[CP_SYS]++; - } - break; - case CHECKSTATE_INTR: - default: -#ifdef GPROF - /* - * Kernel statistics are just like addupc_intr, only easier. - */ - g = &_gmonparam; - if (g->state == GMON_PROF_ON) { - i = checkstate_pc[id] - g->lowpc; - if (i < g->textsize) { - i /= HISTFRACTION * sizeof(*g->kcount); - g->kcount[i]++; - } - } -#endif - if (pscnt > 1) - return; - if (p) - p->p_iticks++; - cp_time[CP_INTR]++; - } - if (p != NULL) { - schedclock(p); - - /* Update resource usage integrals and maximums. */ - if ((pstats = p->p_stats) != NULL && - (ru = &pstats->p_ru) != NULL && - (vm = p->p_vmspace) != NULL) { - ru->ru_ixrss += pgtok(vm->vm_tsize); - ru->ru_idrss += pgtok(vm->vm_dsize); - ru->ru_isrss += pgtok(vm->vm_ssize); - rss = pgtok(vmspace_resident_count(vm)); - if (ru->ru_maxrss < rss) - ru->ru_maxrss = rss; - } - } -#endif -} -#endif - -#if 0 -void -forward_statclock(int pscnt) -{ - int map; - int id; - int i; - - /* Kludge. We don't yet have separate locks for the interrupts - * and the kernel. This means that we cannot let the other processors - * handle complex interrupts while inhibiting them from entering - * the kernel in a non-interrupt context. - * - * What we can do, without changing the locking mechanisms yet, - * is letting the other processors handle a very simple interrupt - * (wich determines the processor states), and do the main - * work ourself. - */ - - if (!smp_started || !invltlb_ok || cold || panicstr) - return; - - printf("forward_statclock\n"); - /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle ) */ - - map = mycpu->gd_other_cpus & ~stopped_cpus ; - checkstate_probed_cpus = 0; - if (map != 0) - selected_apic_ipi(map, - XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED); - - i = 0; - while (checkstate_probed_cpus != map) { - /* spin */ - i++; - if (i == 100000) { -#ifdef BETTER_CLOCK_DIAGNOSTIC - printf("forward_statclock: checkstate %x\n", - checkstate_probed_cpus); -#endif - break; - } - } - - /* - * Step 2: walk through other processors processes, update ticks and - * profiling info. - */ - - map = 0; - for (id = 0; id < ncpus; id++) { - if (id == mycpu->gd_cpuid) - continue; - if (((1 << id) & checkstate_probed_cpus) == 0) - continue; - forwarded_statclock(id, pscnt, &map); - } - if (map != 0) - resched_cpus(map); -} -#endif - -#if 0 -void -forward_hardclock(int pscnt) -{ - int map; - int id; -#if 0 - struct proc *p; - struct pstats *pstats; -#endif - int i; - - /* Kludge. We don't yet have separate locks for the interrupts - * and the kernel. This means that we cannot let the other processors - * handle complex interrupts while inhibiting them from entering - * the kernel in a non-interrupt context. - * - * What we can do, without changing the locking mechanisms yet, - * is letting the other processors handle a very simple interrupt - * (wich determines the processor states), and do the main - * work ourself. - */ - - if (!smp_started || !invltlb_ok || cold || panicstr) - return; - - /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle) */ - - map = mycpu->gd_other_cpus & ~stopped_cpus ; - checkstate_probed_cpus = 0; - if (map != 0) - selected_apic_ipi(map, - XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED); - - i = 0; - while (checkstate_probed_cpus != map) { - /* spin */ - i++; - if (i == 100000) { -#ifdef BETTER_CLOCK_DIAGNOSTIC - printf("forward_hardclock: checkstate %x\n", - checkstate_probed_cpus); -#endif - break; - } - } - - /* - * Step 2: walk through other processors processes, update virtual - * timer and profiling timer. If stathz == 0, also update ticks and - * profiling info. - */ - - map = 0; - for (id = 0; id < ncpus; id++) { - if (id == mycpu->gd_cpuid) - continue; - if (((1 << id) & checkstate_probed_cpus) == 0) - continue; - printf("forward_hardclock\n"); -#if 0 - p = checkstate_curproc[id]; - if (p) { - pstats = p->p_stats; - if (checkstate_cpustate[id] == CHECKSTATE_USER && - timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && - itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) { - psignal(p, SIGVTALRM); - map |= (1 << id); - } - if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) && - itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) { - psignal(p, SIGPROF); - map |= (1 << id); - } - } - if (stathz == 0) { - forwarded_statclock( id, pscnt, &map); - } -#endif - } - if (map != 0) - resched_cpus(map); -} -#endif - -#endif /* BETTER_CLOCK */ - #ifdef APIC_INTR_REORDER /* * Maintain mapping from softintr vector to isr bit in local apic. @@ -2763,6 +2421,8 @@ set_lapic_isrloc(int intr, int vector) #endif /* + * XXX DEPRECATED + * * All-CPU rendezvous. CPUs are signalled, all execute the setup function * (if specified), rendezvous, execute the action function (if specified), * rendezvous again, execute the teardown function (if specified), and then @@ -2816,8 +2476,17 @@ smp_rendezvous(void (* setup_func)(void *), smp_rv_waiters[0] = 0; smp_rv_waiters[1] = 0; - /* signal other processors, which will enter the IPI with interrupts off */ - all_but_self_ipi(XRENDEZVOUS_OFFSET); + /* + * Signal other processors which will enter the IPI with interrupts + * disabled. We cannot safely use broadcast IPIs if some of our + * cpus failed to start. + */ + if (smp_startup_mask == smp_active_mask) { + all_but_self_ipi(XRENDEZVOUS_OFFSET); + } else { + selected_apic_ipi(smp_active_mask, XRENDEZVOUS_OFFSET, + APIC_DELMODE_FIXED); + } /* call executor function */ smp_rendezvous_action(); @@ -2829,5 +2498,6 @@ smp_rendezvous(void (* setup_func)(void *), void cpu_send_ipiq(int dcpu) { - selected_apic_ipi(1 << dcpu, XIPIQ_OFFSET, APIC_DELMODE_FIXED); + if ((1 << dcpu) & smp_active_mask) + selected_apic_ipi(1 << dcpu, XIPIQ_OFFSET, APIC_DELMODE_FIXED); } diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 737b99702b..0c9f31615d 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -40,7 +40,7 @@ * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ - * $DragonFly: src/sys/i386/i386/Attic/pmap.c,v 1.29 2004/02/14 20:34:27 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/pmap.c,v 1.30 2004/02/17 19:38:53 dillon Exp $ */ /* @@ -105,6 +105,8 @@ #include #endif /* SMP || APIC_IO */ #include +#include +#include #define PMAP_KEEP_PDIRS #ifndef PMAP_SHPGPERPROC @@ -135,8 +137,6 @@ #define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) #define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) -#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W)) -#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) /* * Given a map and a machine independent protection code, @@ -196,11 +196,12 @@ static __inline void pmap_changebit (vm_page_t m, int bit, boolean_t setem); static void pmap_remove_all (vm_page_t m); static vm_page_t pmap_enter_quick (pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte); -static int pmap_remove_pte (struct pmap *pmap, unsigned *ptq, - vm_offset_t sva); -static void pmap_remove_page (struct pmap *pmap, vm_offset_t va); +static int pmap_remove_pte (struct pmap *pmap, unsigned *ptq, + vm_offset_t sva, pmap_inval_info_t info); +static void pmap_remove_page (struct pmap *pmap, + vm_offset_t va, pmap_inval_info_t info); static int pmap_remove_entry (struct pmap *pmap, vm_page_t m, - vm_offset_t va); + vm_offset_t va, pmap_inval_info_t info); static boolean_t pmap_testbit (vm_page_t m, int bit); static void pmap_insert_entry (pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m); @@ -211,7 +212,7 @@ static int pmap_release_free_page (pmap_t pmap, vm_page_t p); static vm_page_t _pmap_allocpte (pmap_t pmap, unsigned ptepindex); static unsigned * pmap_pte_quick (pmap_t pmap, vm_offset_t va); static vm_page_t pmap_page_lookup (vm_object_t object, vm_pindex_t pindex); -static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t); +static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t, pmap_inval_info_t); static vm_offset_t pmap_kmem_choose(vm_offset_t addr); static unsigned pdir4mb; @@ -439,7 +440,7 @@ pmap_bootstrap(firstaddr, loadaddr) */ PTD[KPTDI] = (pd_entry_t)ptditmp; kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t)ptditmp; - invltlb(); + cpu_invltlb(); #endif } #endif @@ -463,7 +464,7 @@ pmap_bootstrap(firstaddr, loadaddr) gd->gd_CADDR3 = CPU_prvspace[0].CPAGE3; gd->gd_PADDR1 = (unsigned *)CPU_prvspace[0].PPAGE1; - invltlb(); + cpu_invltlb(); } #ifdef SMP @@ -590,47 +591,6 @@ pmap_track_modified(vm_offset_t va) return 0; } -static PMAP_INLINE void -invltlb_1pg(vm_offset_t va) -{ -#if defined(I386_CPU) - if (cpu_class == CPUCLASS_386) { - invltlb(); - } else -#endif - { - invlpg(va); - } -} - -static __inline void -pmap_TLB_invalidate(pmap_t pmap, vm_offset_t va) -{ -#if defined(SMP) - if (pmap->pm_active & (1 << mycpu->gd_cpuid)) - cpu_invlpg((void *)va); - if (pmap->pm_active & mycpu->gd_other_cpus) - smp_invltlb(); -#else - if (pmap->pm_active) - invltlb_1pg(va); -#endif -} - -static __inline void -pmap_TLB_invalidate_all(pmap_t pmap) -{ -#if defined(SMP) - if (pmap->pm_active & (1 << mycpu->gd_cpuid)) - cpu_invltlb(); - if (pmap->pm_active & mycpu->gd_other_cpus) - smp_invltlb(); -#else - if (pmap->pm_active) - invltlb(); -#endif -} - static unsigned * get_ptbase(pmap_t pmap) { @@ -647,12 +607,8 @@ get_ptbase(pmap_t pmap) if (frame != (((unsigned) APTDpde) & PG_FRAME)) { APTDpde = (pd_entry_t)(frame | PG_RW | PG_V); -#if defined(SMP) /* The page directory is not shared between CPUs */ cpu_invltlb(); -#else - invltlb(); -#endif } return (unsigned *) APTmap; } @@ -737,13 +693,15 @@ PMAP_INLINE void pmap_kenter(vm_offset_t va, vm_paddr_t pa) { unsigned *pte; - unsigned npte, opte; + unsigned npte; + pmap_inval_info info; + pmap_inval_init(&info); + pmap_inval_add(&info, kernel_pmap, va); npte = pa | PG_RW | PG_V | pgeflag; pte = (unsigned *)vtopte(va); - opte = *pte; *pte = npte; - invltlb_1pg(va); + pmap_inval_flush(&info); } /* @@ -753,10 +711,13 @@ PMAP_INLINE void pmap_kremove(vm_offset_t va) { unsigned *pte; + pmap_inval_info info; + pmap_inval_init(&info); + pmap_inval_add(&info, kernel_pmap, va); pte = (unsigned *)vtopte(va); *pte = 0; - invltlb_1pg(va); + pmap_inval_flush(&info); } /* @@ -798,16 +759,12 @@ pmap_qenter(vm_offset_t va, vm_page_t *m, int count) pte = (unsigned *)vtopte(va); *pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag; -#ifdef SMP cpu_invlpg((void *)va); -#else - invltlb_1pg(va); -#endif va += PAGE_SIZE; m++; } #ifdef SMP - smp_invltlb(); + smp_invltlb(); /* XXX */ #endif } @@ -827,11 +784,7 @@ pmap_qremove(vm_offset_t va, int count) pte = (unsigned *)vtopte(va); *pte = 0; -#ifdef SMP cpu_invlpg((void *)va); -#else - invltlb_1pg(va); -#endif va += PAGE_SIZE; } #ifdef SMP @@ -965,8 +918,9 @@ pmap_swapin_proc(struct proc *p) * drops to zero, then it decrements the wire count. */ static int -_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) +_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, pmap_inval_info_t info) { + pmap_inval_flush(info); while (vm_page_sleep_busy(m, FALSE, "pmuwpt")) ; @@ -975,6 +929,7 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) /* * unmap the page table page */ + pmap_inval_add(info, pmap, -1); pmap->pm_pdir[m->pindex] = 0; --pmap->pm_stats.resident_count; if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) == @@ -984,7 +939,6 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) * take effect immediately. */ pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex); - pmap_TLB_invalidate(pmap, pteva); } if (pmap->pm_ptphint == m) @@ -995,7 +949,6 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) */ --m->wire_count; if (m->wire_count == 0) { - vm_page_flash(m); vm_page_busy(m); vm_page_free_zero(m); @@ -1007,11 +960,11 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) } static PMAP_INLINE int -pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) +pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, pmap_inval_info_t info) { vm_page_unhold(m); if (m->hold_count == 0) - return _pmap_unwire_pte_hold(pmap, m); + return _pmap_unwire_pte_hold(pmap, m, info); else return 0; } @@ -1021,7 +974,8 @@ pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) * conditionally free the page, and manage the hold/wire counts. */ static int -pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) +pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte, + pmap_inval_info_t info) { unsigned ptepindex; if (va >= UPT_MIN_ADDRESS) @@ -1033,12 +987,13 @@ pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) (pmap->pm_ptphint->pindex == ptepindex)) { mpte = pmap->pm_ptphint; } else { + pmap_inval_flush(info); mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex); pmap->pm_ptphint = mpte; } } - return pmap_unwire_pte_hold(pmap, mpte); + return pmap_unwire_pte_hold(pmap, mpte, info); } void @@ -1255,7 +1210,8 @@ pmap_allocpte(pmap_t pmap, vm_offset_t va) if (ptepa & PG_PS) { pmap->pm_pdir[ptepindex] = 0; ptepa = 0; - invltlb(); + cpu_invltlb(); + smp_invltlb(); } /* @@ -1501,9 +1457,9 @@ pmap_collect(void) * to the header. Otherwise we must search the list for * the entry. In either case we free the now unused entry. */ - static int -pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va) +pmap_remove_entry(struct pmap *pmap, vm_page_t m, + vm_offset_t va, pmap_inval_info_t info) { pv_entry_t pv; int rtval; @@ -1524,17 +1480,14 @@ pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va) rtval = 0; if (pv) { - - rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); + rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem, info); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); m->md.pv_list_count--; if (TAILQ_FIRST(&m->md.pv_list) == NULL) vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); - TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); free_pv_entry(pv); } - splx(s); return rtval; } @@ -1566,20 +1519,23 @@ pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m) * pmap_remove_pte: do the things to unmap a page in a process */ static int -pmap_remove_pte(struct pmap *pmap, unsigned *ptq, vm_offset_t va) +pmap_remove_pte(struct pmap *pmap, unsigned *ptq, vm_offset_t va, + pmap_inval_info_t info) { unsigned oldpte; vm_page_t m; + pmap_inval_add(info, pmap, va); oldpte = loadandclear(ptq); if (oldpte & PG_W) pmap->pm_stats.wired_count -= 1; /* * Machines that don't support invlpg, also don't support - * PG_G. + * PG_G. XXX PG_G is disabled for SMP so don't worry about + * the SMP case. */ if (oldpte & PG_G) - invlpg(va); + cpu_invlpg(va); pmap->pm_stats.resident_count -= 1; if (oldpte & PG_MANAGED) { m = PHYS_TO_VM_PAGE(oldpte); @@ -1596,9 +1552,9 @@ pmap_remove_pte(struct pmap *pmap, unsigned *ptq, vm_offset_t va) } if (oldpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); - return pmap_remove_entry(pmap, m, va); + return pmap_remove_entry(pmap, m, va, info); } else { - return pmap_unuse_pt(pmap, va, NULL); + return pmap_unuse_pt(pmap, va, NULL, info); } return 0; @@ -1613,7 +1569,7 @@ pmap_remove_pte(struct pmap *pmap, unsigned *ptq, vm_offset_t va) * not kernel_pmap. */ static void -pmap_remove_page(struct pmap *pmap, vm_offset_t va) +pmap_remove_page(struct pmap *pmap, vm_offset_t va, pmap_inval_info_t info) { unsigned *ptq; @@ -1624,14 +1580,13 @@ pmap_remove_page(struct pmap *pmap, vm_offset_t va) if (*pmap_pde(pmap, va) != 0) { ptq = get_ptbase(pmap) + i386_btop(va); if (*ptq) { - (void) pmap_remove_pte(pmap, ptq, va); - pmap_TLB_invalidate(pmap, va); + pmap_remove_pte(pmap, ptq, va, info); } } } /* - * pmap_remopve: + * pmap_remove: * * Remove the given range of addresses from the specified map. * @@ -1648,7 +1603,7 @@ pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) vm_offset_t pdnxt; vm_offset_t ptpaddr; vm_offset_t sindex, eindex; - int anyvalid; + struct pmap_inval_info info; if (pmap == NULL) return; @@ -1656,6 +1611,8 @@ pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) if (pmap->pm_stats.resident_count == 0) return; + pmap_inval_init(&info); + /* * special handling of removing one page. a very * common operation and easy to short circuit some @@ -1663,12 +1620,11 @@ pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) */ if (((sva + PAGE_SIZE) == eva) && (((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { - pmap_remove_page(pmap, sva); + pmap_remove_page(pmap, sva, &info); + pmap_inval_flush(&info); return; } - anyvalid = 0; - /* * Get a local virtual address for the mappings that are being * worked with. @@ -1690,9 +1646,9 @@ pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) pdirindex = sindex / NPDEPG; if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) { + pmap_inval_add(&info, pmap, -1); pmap->pm_pdir[pdirindex] = 0; pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; - anyvalid++; continue; } @@ -1712,22 +1668,16 @@ pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) pdnxt = eindex; } - for ( ;sindex != pdnxt; sindex++) { + for (; sindex != pdnxt; sindex++) { vm_offset_t va; - if (ptbase[sindex] == 0) { + if (ptbase[sindex] == 0) continue; - } va = i386_ptob(sindex); - - anyvalid++; - if (pmap_remove_pte(pmap, - ptbase + sindex, va)) + if (pmap_remove_pte(pmap, ptbase + sindex, va, &info)) break; } } - - if (anyvalid) - pmap_TLB_invalidate_all(pmap); + pmap_inval_flush(&info); } /* @@ -1742,8 +1692,9 @@ pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) static void pmap_remove_all(vm_page_t m) { - pv_entry_t pv; + struct pmap_inval_info info; unsigned *pte, tpte; + pv_entry_t pv; int s; #if defined(PMAP_DIAGNOSTIC) @@ -1756,11 +1707,13 @@ pmap_remove_all(vm_page_t m) } #endif + pmap_inval_init(&info); s = splvm(); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pv->pv_pmap->pm_stats.resident_count--; pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); + pmap_inval_add(&info, pv->pv_pmap, pv->pv_va); tpte = loadandclear(pte); if (tpte & PG_W) @@ -1783,18 +1736,16 @@ pmap_remove_all(vm_page_t m) if (pmap_track_modified(pv->pv_va)) vm_page_dirty(m); } - pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va); - TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); m->md.pv_list_count--; - pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); + pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem, &info); free_pv_entry(pv); } vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); - splx(s); + pmap_inval_flush(&info); } /* @@ -1812,7 +1763,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) unsigned *ptbase; vm_offset_t pdnxt, ptpaddr; vm_pindex_t sindex, eindex; - int anychanged; + pmap_inval_info info; if (pmap == NULL) return; @@ -1825,7 +1776,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) if (prot & VM_PROT_WRITE) return; - anychanged = 0; + pmap_inval_init(&info); ptbase = get_ptbase(pmap); @@ -1840,9 +1791,9 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) pdirindex = sindex / NPDEPG; if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) { + pmap_inval_add(&info, pmap, -1); (unsigned) pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; - anychanged++; continue; } @@ -1862,6 +1813,8 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) unsigned pbits; vm_page_t m; + /* XXX this isn't optimal */ + pmap_inval_add(&info, pmap, i386_ptob(sindex)); pbits = ptbase[sindex]; if (pbits & PG_MANAGED) { @@ -1885,12 +1838,10 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) if (pbits != ptbase[sindex]) { ptbase[sindex] = pbits; - anychanged = 1; } } } - if (anychanged) - pmap_TLB_invalidate_all(pmap); + pmap_inval_flush(&info); } /* @@ -1914,6 +1865,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_paddr_t opa; vm_offset_t origpte, newpte; vm_page_t mpte; + pmap_inval_info info; if (pmap == NULL) return; @@ -1934,26 +1886,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, if (va < UPT_MIN_ADDRESS) { mpte = pmap_allocpte(pmap, va); } -#if 0 && defined(PMAP_DIAGNOSTIC) - else { - vm_offset_t *pdeaddr = (vm_offset_t *)pmap_pde(pmap, va); - if (((origpte = (vm_offset_t) *pdeaddr) & PG_V) == 0) { - panic("pmap_enter: invalid kernel page table page(0), pdir=%p, pde=%p, va=%p\n", - pmap->pm_pdir[PTDPTDI], origpte, va); - } - if (smp_active) { - pdeaddr = (vm_offset_t *) IdlePTDS[cpuid]; - if (((newpte = pdeaddr[va >> PDRSHIFT]) & PG_V) == 0) { - if ((vm_offset_t) my_idlePTD != (vm_offset_t) vtophys(pdeaddr)) - printf("pde mismatch: %x, %x\n", my_idlePTD, pdeaddr); - printf("cpuid: %d, pdeaddr: 0x%x\n", cpuid, pdeaddr); - panic("pmap_enter: invalid kernel page table page(1), pdir=%p, npde=%p, pde=%p, va=%p\n", - pmap->pm_pdir[PTDPTDI], newpte, origpte, va); - } - } - } -#endif + pmap_inval_init(&info); pte = pmap_pte(pmap, va); /* @@ -1965,6 +1899,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, } pa = VM_PAGE_TO_PHYS(m) & PG_FRAME; + pmap_inval_add(&info, pmap, va); /* XXX non-optimal */ origpte = *(vm_offset_t *)pte; opa = origpte & PG_FRAME; @@ -2001,16 +1936,9 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, mpte->hold_count--; if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) { - if ((origpte & PG_RW) == 0) { + if ((origpte & PG_RW) == 0) *pte |= PG_RW; -#ifdef SMP - cpu_invlpg((void *)va); - if (pmap->pm_active & mycpu->gd_other_cpus) - smp_invltlb(); -#else - invltlb_1pg(va); -#endif - } + pmap_inval_flush(&info); return; } @@ -2034,7 +1962,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, */ if (opa) { int err; - err = pmap_remove_pte(pmap, pte, va); + err = pmap_remove_pte(pmap, pte, va, &info); if (err) panic("pmap_enter: pte vanished, va: 0x%x", va); } @@ -2076,16 +2004,8 @@ validate: */ if ((origpte & ~(PG_M|PG_A)) != newpte) { *pte = newpte | PG_A; - /*if (origpte)*/ { -#ifdef SMP - cpu_invlpg((void *)va); - if (pmap->pm_active & mycpu->gd_other_cpus) - smp_invltlb(); -#else - invltlb_1pg(va); -#endif - } } + pmap_inval_flush(&info); } /* @@ -2104,6 +2024,9 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte) { unsigned *pte; vm_paddr_t pa; + pmap_inval_info info; + + pmap_inval_init(&info); /* * In the case that a page table page is not @@ -2160,7 +2083,7 @@ retry: pte = (unsigned *)vtopte(va); if (*pte) { if (mpte) - pmap_unwire_pte_hold(pmap, mpte); + pmap_unwire_pte_hold(pmap, mpte, &info); return 0; } @@ -2273,7 +2196,8 @@ retry: ptepindex += 1; } vm_page_flag_set(p, PG_MAPPED); - invltlb(); + cpu_invltlb(); + smp_invltlb(); return; } @@ -2360,7 +2284,6 @@ retry: } } } - return; } /* @@ -2477,9 +2400,22 @@ pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) /* * Wiring is not a hardware characteristic so there is no need to - * invalidate TLB. + * invalidate TLB. However, in an SMP environment we must use + * a locked bus cycle to update the pte (if we are not using + * the pmap_inval_*() API that is)... it's ok to do this for simple + * wiring changes. */ - pmap_pte_set_w(pte, wired); +#ifdef SMP + if (wired) + atomic_set_int(pte, PG_W); + else + atomic_clear_int(pte, PG_W); +#else + if (wired) + atomic_set_int_nonlocked(pte, PG_W); + else + atomic_clear_int_nonlocked(pte, PG_W); +#endif } @@ -2495,6 +2431,7 @@ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { + pmap_inval_info info; vm_offset_t addr; vm_offset_t end_addr = src_addr + len; vm_offset_t pdnxt; @@ -2512,13 +2449,12 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME; if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) { APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V); -#if defined(SMP) /* The page directory is not shared between CPUs */ cpu_invltlb(); -#else - invltlb(); -#endif } + pmap_inval_init(&info); + pmap_inval_add(&info, dst_pmap, -1); + pmap_inval_add(&info, src_pmap, -1); for(addr = src_addr; addr < end_addr; addr = pdnxt) { unsigned *src_pte, *dst_pte; @@ -2588,7 +2524,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, pmap_insert_entry(dst_pmap, addr, dstmpte, m); } else { - pmap_unwire_pte_hold(dst_pmap, dstmpte); + pmap_unwire_pte_hold(dst_pmap, dstmpte, &info); } if (dstmpte->hold_count >= srcmpte->hold_count) break; @@ -2598,6 +2534,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, dst_pte++; } } + pmap_inval_flush(&info); } /* @@ -2803,6 +2740,7 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) pv_entry_t pv, npv; int s; vm_page_t m; + pmap_inval_info info; #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace))) { @@ -2811,6 +2749,7 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) } #endif + pmap_inval_init(&info); s = splvm(); for(pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; @@ -2826,6 +2765,7 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) #else pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); #endif + pmap_inval_add(&info, pv->pv_pmap, pv->pv_va); tpte = *pte; /* @@ -2861,11 +2801,11 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); } - pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); + pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem, &info); free_pv_entry(pv); } + pmap_inval_flush(&info); splx(s); - pmap_TLB_invalidate_all(pmap); } /* @@ -2921,6 +2861,7 @@ pmap_testbit(vm_page_t m, int bit) static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem) { + struct pmap_inval_info info; pv_entry_t pv; unsigned *pte; int s; @@ -2928,6 +2869,7 @@ pmap_changebit(vm_page_t m, int bit, boolean_t setem) if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) return; + pmap_inval_init(&info); s = splvm(); /* @@ -2950,11 +2892,21 @@ pmap_changebit(vm_page_t m, int bit, boolean_t setem) } #endif + /* + * Careful here. We can use a locked bus instruction to + * clear PG_A or PG_M safely but we need to synchronize + * with the target cpus when we mess with PG_RW. + */ pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); + if (bit == PG_RW) + pmap_inval_add(&info, pv->pv_pmap, pv->pv_va); if (setem) { - *(int *)pte |= bit; - pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va); +#ifdef SMP + atomic_set_int(pte, bit); +#else + atomic_set_int_nonlocked(pte, bit); +#endif } else { vm_offset_t pbits = *(vm_offset_t *)pte; if (pbits & bit) { @@ -2962,14 +2914,22 @@ pmap_changebit(vm_page_t m, int bit, boolean_t setem) if (pbits & PG_M) { vm_page_dirty(m); } - *(int *)pte = pbits & ~(PG_M|PG_RW); +#ifdef SMP + atomic_clear_int(pte, PG_M|PG_RW); +#else + atomic_clear_int_nonlocked(pte, PG_M|PG_RW); +#endif } else { - *(int *)pte = pbits & ~bit; +#ifdef SMP + atomic_clear_int(pte, bit); +#else + atomic_clear_int_nonlocked(pte, bit); +#endif } - pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va); } } } + pmap_inval_flush(&info); splx(s); } @@ -3038,10 +2998,11 @@ pmap_ts_referenced(vm_page_t m) pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); if (pte && (*pte & PG_A)) { - *pte &= ~PG_A; - - pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va); - +#ifdef SMP + atomic_clear_int(pte, PG_A); +#else + atomic_clear_int_nonlocked(pte, PG_A); +#endif rtval++; if (rtval > 4) { break; @@ -3148,7 +3109,8 @@ pmap_mapdev(vm_paddr_t pa, vm_size_t size) tmpva += PAGE_SIZE; pa += PAGE_SIZE; } - invltlb(); + cpu_invltlb(); + smp_invltlb(); return ((void *)(va + offset)); } diff --git a/sys/i386/i386/pmap_inval.c b/sys/i386/i386/pmap_inval.c new file mode 100644 index 0000000000..20a1006850 --- /dev/null +++ b/sys/i386/i386/pmap_inval.c @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2003 Matthew Dillon + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/i386/i386/Attic/pmap_inval.c,v 1.1 2004/02/17 19:38:53 dillon Exp $ + */ + +/* + * pmap invalidation support code. Certain hardware requirements must + * be dealt with when manipulating page table entries and page directory + * entries within a pmap. In particular, we cannot safely manipulate + * page tables which are in active use by another cpu (even if it is + * running in userland) for two reasons: First, TLB writebacks will + * race against our own modifications and tests. Second, even if we + * were to use bus-locked instruction we can still screw up the + * target cpu's instruction pipeline due to Intel cpu errata. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#if defined(SMP) || defined(APIC_IO) +#include +#include +#endif /* SMP || APIC_IO */ +#include +#include +#include + +#ifdef SMP + +static void +_cpu_invltlb(void *dummy) +{ + cpu_invltlb(); +} + +static void +_cpu_invl1pg(void *data) +{ + cpu_invlpg(data); +} + +#endif + +/* + * Initialize for add or flush + */ +void +pmap_inval_init(pmap_inval_info_t info) +{ + info->pir_flags = 0; +} + +/* + * Add a (pmap, va) pair to the invalidation list and protect access + * as appropriate. + */ +void +pmap_inval_add(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) +{ +#ifdef SMP + if ((info->pir_flags & PIRF_CPUSYNC) == 0) { + info->pir_flags |= PIRF_CPUSYNC; + info->pir_cpusync.cs_run_func = NULL; + info->pir_cpusync.cs_fin1_func = NULL; + info->pir_cpusync.cs_fin2_func = NULL; + lwkt_cpusync_start(pmap->pm_active, &info->pir_cpusync); + } else if (pmap->pm_active & ~info->pir_cpusync.cs_mask) { + lwkt_cpusync_add(pmap->pm_active, &info->pir_cpusync); + } +#else + if (pmap->pm_active == 0) + return; +#endif + if ((info->pir_flags & (PIRF_INVLTLB|PIRF_INVL1PG)) == 0) { + if (va == (vm_offset_t)-1) { + info->pir_flags |= PIRF_INVLTLB; +#ifdef SMP + info->pir_cpusync.cs_fin2_func = _cpu_invltlb; +#endif + } else { + info->pir_flags |= PIRF_INVL1PG; + info->pir_cpusync.cs_data = (void *)va; +#ifdef SMP + info->pir_cpusync.cs_fin2_func = _cpu_invl1pg; +#endif + } + } else { + info->pir_flags |= PIRF_INVLTLB; +#ifdef SMP + info->pir_cpusync.cs_fin2_func = _cpu_invltlb; +#endif + } +} + +/* + * Synchronize changes with target cpus. + */ +void +pmap_inval_flush(pmap_inval_info_t info) +{ +#ifdef SMP + if (info->pir_flags & PIRF_CPUSYNC) + lwkt_cpusync_finish(&info->pir_cpusync); +#else + if (info->pir_flags & PIRF_INVLTLB) + cpu_invltlb(); + else if (info->pir_flags & PIRF_INVL1PG) + cpu_invlpg(info->pir_cpusync.cs_data); +#endif + info->pir_flags = 0; +} + diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index 58480a7f41..12fc0855c6 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -39,7 +39,7 @@ * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ * $FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.132.2.9 2003/01/25 19:02:23 dillon Exp $ - * $DragonFly: src/sys/i386/i386/Attic/vm_machdep.c,v 1.26 2003/12/20 05:52:26 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/vm_machdep.c,v 1.27 2004/02/17 19:38:53 dillon Exp $ */ #include "use_npx.h" @@ -417,16 +417,15 @@ void cpu_reset() { #ifdef SMP - if (smp_active == 0) { + if (smp_active_mask == 1) { cpu_reset_real(); /* NOTREACHED */ } else { - u_int map; int cnt; printf("cpu_reset called on cpu#%d\n",mycpu->gd_cpuid); - map = mycpu->gd_other_cpus & ~ stopped_cpus; + map = mycpu->gd_other_cpus & ~stopped_cpus & smp_active_mask; if (map != 0) { printf("cpu_reset: Stopping other CPUs\n"); @@ -502,7 +501,7 @@ cpu_reset_real() bzero((caddr_t) PTD, PAGE_SIZE); /* "good night, sweet prince .... " */ - invltlb(); + cpu_invltlb(); /* NOTREACHED */ while(1); } diff --git a/sys/i386/include/cpu.h b/sys/i386/include/cpu.h index 159b9d4bcf..b91f5e5d41 100644 --- a/sys/i386/include/cpu.h +++ b/sys/i386/include/cpu.h @@ -35,7 +35,7 @@ * * from: @(#)cpu.h 5.4 (Berkeley) 5/9/91 * $FreeBSD: src/sys/i386/include/cpu.h,v 1.43.2.2 2001/06/15 09:37:57 scottl Exp $ - * $DragonFly: src/sys/i386/include/Attic/cpu.h,v 1.13 2004/01/30 05:42:16 dillon Exp $ + * $DragonFly: src/sys/i386/include/Attic/cpu.h,v 1.14 2004/02/17 19:38:53 dillon Exp $ */ #ifndef _MACHINE_CPU_H_ @@ -76,6 +76,8 @@ atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_RESCHED) #define need_proftick() \ atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_OWEUPC) +#define need_ipiq() \ + atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_IPIQ) #define signotify() \ atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_SIGNAL) #define sigupcall() \ diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h index 4eb501e08d..bbf2a2fc46 100644 --- a/sys/i386/include/cpufunc.h +++ b/sys/i386/include/cpufunc.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/cpufunc.h,v 1.96.2.3 2002/04/28 22:50:54 dwmalone Exp $ - * $DragonFly: src/sys/i386/include/Attic/cpufunc.h,v 1.7 2003/08/26 21:42:18 rob Exp $ + * $DragonFly: src/sys/i386/include/Attic/cpufunc.h,v 1.8 2004/02/17 19:38:53 dillon Exp $ */ /* @@ -142,6 +142,18 @@ cpu_enable_intr(void) __asm __volatile("sti"); } +static __inline void +cpu_mb1(void) +{ + __asm __volatile("" : : : "memory"); +} + +static __inline void +cpu_mb2(void) +{ + __asm __volatile("cpuid" : : : "ax", "bx", "cx", "dx", "memory"); +} + #define HAVE_INLINE_FFS static __inline int @@ -313,35 +325,6 @@ cpu_invltlb(void) #endif } -/* - * Invalidate a patricular VA on all cpus - */ -static __inline void -invlpg(u_int addr) -{ - __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); - smp_invltlb(); -} - -/* - * Invalidate the TLB on all cpus - */ -static __inline void -invltlb(void) -{ - u_int temp; - /* - * This should be implemented as load_cr3(rcr3()) when load_cr3() - * is inlined. - */ - __asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp) - : : "memory"); - smp_invltlb(); -#ifdef SWTCH_OPTIM_STATS - ++tlb_flush_count; -#endif -} - #endif /* _KERNEL */ static __inline u_short @@ -635,8 +618,6 @@ void insb (u_int port, void *addr, size_t cnt); void insl (u_int port, void *addr, size_t cnt); void insw (u_int port, void *addr, size_t cnt); void invd (void); -void invlpg (u_int addr); -void invltlb (void); u_short inw (u_int port); u_int loadandclear (u_int *addr); void outb (u_int port, u_char data); diff --git a/sys/i386/include/globaldata.h b/sys/i386/include/globaldata.h index cf499cf8b0..02a339d8c3 100644 --- a/sys/i386/include/globaldata.h +++ b/sys/i386/include/globaldata.h @@ -28,7 +28,7 @@ * should not include this file. * * $FreeBSD: src/sys/i386/include/globaldata.h,v 1.11.2.1 2000/05/16 06:58:10 dillon Exp $ - * $DragonFly: src/sys/i386/include/Attic/globaldata.h,v 1.21 2003/12/20 05:52:27 dillon Exp $ + * $DragonFly: src/sys/i386/include/Attic/globaldata.h,v 1.22 2004/02/17 19:38:53 dillon Exp $ */ #ifndef _MACHINE_GLOBALDATA_H_ @@ -72,7 +72,7 @@ struct mdglobaldata { int gd_idelayed; /* delayed software ints */ int gd_currentldt; int gd_private_tss; - u_int gd_cpu_lockid; + u_int unused001; u_int gd_other_cpus; u_int gd_ss_eflags; pt_entry_t *gd_CMAP1; diff --git a/sys/i386/include/mpapic.h b/sys/i386/include/mpapic.h index ec5e694288..7f810de9cf 100644 --- a/sys/i386/include/mpapic.h +++ b/sys/i386/include/mpapic.h @@ -23,7 +23,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/mpapic.h,v 1.14.2.2 2000/09/30 02:49:34 ps Exp $ - * $DragonFly: src/sys/i386/include/Attic/mpapic.h,v 1.4 2003/08/07 21:17:22 dillon Exp $ + * $DragonFly: src/sys/i386/include/Attic/mpapic.h,v 1.5 2004/02/17 19:38:53 dillon Exp $ */ #ifndef _MACHINE_MPAPIC_H_ @@ -74,33 +74,15 @@ selected_procs_ipi(int targetMap, int vector) return selected_apic_ipi(targetMap, vector, APIC_DELMODE_FIXED); } -/* - * send an IPI INTerrupt containing 'vector' to all CPUs, including myself - */ -static __inline int -all_procs_ipi(int vector) -{ - return apic_ipi(APIC_DEST_ALLISELF, vector, APIC_DELMODE_FIXED); -} - /* * send an IPI INTerrupt containing 'vector' to all CPUs EXCEPT myself */ static __inline int all_but_self_ipi(int vector) { - if (ncpus <= 1) + if (smp_active_mask == 1) return 0; return apic_ipi(APIC_DEST_ALLESELF, vector, APIC_DELMODE_FIXED); } -/* - * send an IPI INTerrupt containing 'vector' to myself - */ -static __inline int -self_ipi(int vector) -{ - return apic_ipi(APIC_DEST_SELF, vector, APIC_DELMODE_FIXED); -} - #endif /* _MACHINE_MPAPIC_H */ diff --git a/sys/i386/include/pmap_inval.h b/sys/i386/include/pmap_inval.h new file mode 100644 index 0000000000..7ee58961d8 --- /dev/null +++ b/sys/i386/include/pmap_inval.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2003 Matthew Dillon + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/i386/include/Attic/pmap_inval.h,v 1.1 2004/02/17 19:38:54 dillon Exp $ + */ + +#ifndef _MACHINE_PMAP_INVAL_H_ +#define _MACHINE_PMAP_INVAL_H_ + +typedef struct pmap_inval_info { + int pir_flags; + struct lwkt_cpusync pir_cpusync; +} pmap_inval_info; + +typedef pmap_inval_info *pmap_inval_info_t; + +#define PIRF_INVLTLB 0x0001 /* request invalidation of whole table */ +#define PIRF_INVL1PG 0x0002 /* else request invalidation of one page */ +#define PIRF_CPUSYNC 0x0004 /* cpusync is currently active */ + +#ifdef _KERNEL + +void pmap_inval_init(pmap_inval_info_t); +void pmap_inval_add(pmap_inval_info_t, pmap_t, vm_offset_t); +void pmap_inval_flush(pmap_inval_info_t); + +#endif + +#endif diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h index 369c4e17d1..882d4f73e6 100644 --- a/sys/i386/include/smp.h +++ b/sys/i386/include/smp.h @@ -7,7 +7,7 @@ * ---------------------------------------------------------------------------- * * $FreeBSD: src/sys/i386/include/smp.h,v 1.50.2.5 2001/02/13 22:32:45 tegge Exp $ - * $DragonFly: src/sys/i386/include/Attic/smp.h,v 1.8 2003/11/03 02:08:33 dillon Exp $ + * $DragonFly: src/sys/i386/include/Attic/smp.h,v 1.9 2004/02/17 19:38:54 dillon Exp $ * */ @@ -81,7 +81,6 @@ struct apic_intmapinfo { int redirindex; }; extern struct apic_intmapinfo int_to_apicintpin[]; -extern u_int all_cpus; extern struct pcb stoppcbs[]; /* functions in mp_machdep.c */ @@ -109,10 +108,6 @@ void init_secondary (void); int stop_cpus (u_int); void ap_init (void); int restart_cpus (u_int); -#ifdef BETTER_CLOCK -void forward_statclock (int pscnt); -void forward_hardclock (int pscnt); -#endif /* BETTER_CLOCK */ void forward_signal (struct proc *); void forward_roundrobin (void); #ifdef APIC_INTR_REORDER @@ -151,15 +146,12 @@ void u_sleep (int); void cpu_send_ipiq (int); /* global data in init_smp.c */ -extern int invltlb_ok; -extern int smp_active; -extern int smp_started; -extern volatile int smp_idle_loops; +extern cpumask_t smp_active_mask; #endif /* !LOCORE */ #else /* !SMP && !APIC_IO */ -#define smp_active 0 /* smp_active always 0 on UP machines */ +#define smp_active_mask 1 /* smp_active_mask always 1 on UP machines */ #endif diff --git a/sys/i386/include/smptests.h b/sys/i386/include/smptests.h index 19fe59116e..4397a6525f 100644 --- a/sys/i386/include/smptests.h +++ b/sys/i386/include/smptests.h @@ -23,7 +23,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/smptests.h,v 1.33.2.1 2000/05/16 06:58:10 dillon Exp $ - * $DragonFly: src/sys/i386/include/Attic/smptests.h,v 1.3 2003/07/06 21:23:49 dillon Exp $ + * $DragonFly: src/sys/i386/include/Attic/smptests.h,v 1.4 2004/02/17 19:38:54 dillon Exp $ */ #ifndef _MACHINE_SMPTESTS_H_ @@ -34,28 +34,6 @@ * Various 'tests in progress' and configuration parameters. */ - -/* - * Tor's clock improvements. - * - * When the giant kernel lock disappears, a different strategy should - * probably be used, thus this patch can only be considered a temporary - * measure. - * - * This patch causes (NCPU-1)*(128+100) extra IPIs per second. - * During profiling, the number is (NCPU-1)*(1024+100) extra IPIs/s - * in addition to extra IPIs due to forwarding ASTs to other CPUs. - * - * Having a shared AST flag in an SMP configuration is wrong, and I've - * just kludged around it, based upon the kernel lock blocking other - * processors from entering the kernel while handling an AST for one - * processor. When the giant kernel lock disappers, this kludge breaks. - * - * -- Tor - */ -#define BETTER_CLOCK - - /* * Control the "giant lock" pushdown by logical steps. */ diff --git a/sys/i386/isa/apic_vector.s b/sys/i386/isa/apic_vector.s index 59264d3f74..1ae3fe4fee 100644 --- a/sys/i386/isa/apic_vector.s +++ b/sys/i386/isa/apic_vector.s @@ -1,7 +1,7 @@ /* * from: vector.s, 386BSD 0.1 unknown origin * $FreeBSD: src/sys/i386/isa/apic_vector.s,v 1.47.2.5 2001/09/01 22:33:38 tegge Exp $ - * $DragonFly: src/sys/i386/isa/Attic/apic_vector.s,v 1.16 2004/02/12 06:57:46 dillon Exp $ + * $DragonFly: src/sys/i386/isa/Attic/apic_vector.s,v 1.17 2004/02/17 19:38:54 dillon Exp $ */ @@ -338,69 +338,6 @@ Xinvltlb: iret -#if 0 -#ifdef BETTER_CLOCK - -/* - * Executed by a CPU when it receives an Xcpucheckstate IPI from another CPU, - * - * - Stores current cpu state in checkstate_cpustate[cpuid] - * 0 == user, 1 == sys, 2 == intr - * - Stores current process in checkstate_curproc[cpuid] - * - * - Signals its receipt by setting bit cpuid in checkstate_probed_cpus. - * - * stack: 0->ds, 4->fs, 8->ebx, 12->eax, 16->eip, 20->cs, 24->eflags - */ - - .text - SUPERALIGN_TEXT - .globl Xcpucheckstate - .globl checkstate_cpustate - .globl checkstate_curproc - .globl checkstate_pc -Xcpucheckstate: - pushl %eax - pushl %ebx - pushl %ds /* save current data segment */ - pushl %fs - - movl $KDSEL, %eax - mov %ax, %ds /* use KERNEL data segment */ - movl $KPSEL, %eax - mov %ax, %fs - - movl $0, lapic_eoi /* End Of Interrupt to APIC */ - - movl $0, %ebx - movl 20(%esp), %eax - andl $3, %eax - cmpl $3, %eax - je 1f - testl $PSL_VM, 24(%esp) - jne 1f - incl %ebx /* system or interrupt */ -1: - movl PCPU(cpuid), %eax - movl %ebx, checkstate_cpustate(,%eax,4) - movl PCPU(curthread), %ebx - movl TD_PROC(%ebx),%ebx - movl %ebx, checkstate_curproc(,%eax,4) - movl 16(%esp), %ebx - movl %ebx, checkstate_pc(,%eax,4) - - lock /* checkstate_probed_cpus |= (1< #include #include -#include /* smp_active, cpuid */ +#include /* smp_active_mask, cpuid */ #include @@ -230,7 +230,7 @@ boot(int howto) howto |= shutdown_howto; #ifdef SMP - if (smp_active) { + if (smp_active_mask > 1) { printf("boot() called on cpu#%d\n", mycpu->gd_cpuid); } #endif diff --git a/sys/kern/lwkt_ipiq.c b/sys/kern/lwkt_ipiq.c index cc9faa0730..340bd16d41 100644 --- a/sys/kern/lwkt_ipiq.c +++ b/sys/kern/lwkt_ipiq.c @@ -23,7 +23,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/kern/lwkt_ipiq.c,v 1.2 2004/02/15 05:15:25 dillon Exp $ + * $DragonFly: src/sys/kern/lwkt_ipiq.c,v 1.3 2004/02/17 19:38:49 dillon Exp $ */ /* @@ -82,8 +82,9 @@ #endif #ifdef SMP -static __int64_t ipiq_count = 0; -static __int64_t ipiq_fifofull = 0; +static __int64_t ipiq_count; +static __int64_t ipiq_fifofull; +static __int64_t ipiq_cscount; #endif #ifdef _KERNEL @@ -91,6 +92,7 @@ static __int64_t ipiq_fifofull = 0; #ifdef SMP SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, ""); SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, ""); +SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_cscount, CTLFLAG_RW, &ipiq_cscount, 0, ""); #endif #endif @@ -254,8 +256,11 @@ again: } } if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { - if (lwkt_process_ipiq1(&gd->gd_cpusyncq, NULL)) - goto again; + if (lwkt_process_ipiq1(&gd->gd_cpusyncq, NULL)) { + if (gd->gd_curthread->td_cscount == 0) + goto again; + need_ipiq(); + } } } @@ -278,8 +283,11 @@ again: } } if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { - if (lwkt_process_ipiq1(&gd->gd_cpusyncq, &frame)) - goto again; + if (lwkt_process_ipiq1(&gd->gd_cpusyncq, &frame)) { + if (gd->gd_curthread->td_cscount == 0) + goto again; + need_ipiq(); + } } } #endif @@ -304,6 +312,27 @@ lwkt_process_ipiq1(lwkt_ipiq_t ip, struct intrframe *frame) return(wi != ip->ip_windex); } +#else + +/* + * !SMP dummy routines + */ + +int +lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg) +{ + panic("lwkt_send_ipiq: UP box! (%d,%p,%p)", target->gd_cpuid, func, arg); + return(0); /* NOT REACHED */ +} + +void +lwkt_wait_ipiq(globaldata_t target, int seq) +{ + panic("lwkt_wait_ipiq: UP box! (%d,%d)", target->gd_cpuid, seq); +} + +#endif + /* * CPU Synchronization Support * @@ -375,64 +404,97 @@ lwkt_cpusync_fastdata(cpumask_t mask, cpusync_func2_t func, void *data) void lwkt_cpusync_start(cpumask_t mask, lwkt_cpusync_t poll) { + globaldata_t gd = mycpu; + poll->cs_count = 0; poll->cs_mask = mask; - poll->cs_maxcount = lwkt_send_ipiq_mask(mask & mycpu->gd_other_cpus, - (ipifunc_t)lwkt_cpusync_remote1, poll); - if (mask & (1 << mycpu->gd_cpuid)) { +#ifdef SMP + poll->cs_maxcount = lwkt_send_ipiq_mask( + mask & gd->gd_other_cpus & smp_active_mask, + (ipifunc_t)lwkt_cpusync_remote1, poll); +#endif + if (mask & (1 << gd->gd_cpuid)) { if (poll->cs_run_func) poll->cs_run_func(poll); } - while (poll->cs_count != poll->cs_maxcount) { - crit_enter(); - lwkt_process_ipiq(); - crit_exit(); +#ifdef SMP + if (poll->cs_maxcount) { + ++ipiq_cscount; + ++gd->gd_curthread->td_cscount; + while (poll->cs_count != poll->cs_maxcount) { + crit_enter(); + lwkt_process_ipiq(); + crit_exit(); + } } +#endif } void lwkt_cpusync_add(cpumask_t mask, lwkt_cpusync_t poll) { + globaldata_t gd = mycpu; + int count; + mask &= ~poll->cs_mask; poll->cs_mask |= mask; - poll->cs_maxcount += lwkt_send_ipiq_mask(mask & mycpu->gd_other_cpus, - (ipifunc_t)lwkt_cpusync_remote1, poll); - if (mask & (1 << mycpu->gd_cpuid)) { +#ifdef SMP + count = lwkt_send_ipiq_mask( + mask & gd->gd_other_cpus & smp_active_mask, + (ipifunc_t)lwkt_cpusync_remote1, poll); +#endif + if (mask & (1 << gd->gd_cpuid)) { if (poll->cs_run_func) poll->cs_run_func(poll); } - while (poll->cs_count != poll->cs_maxcount) { - crit_enter(); - lwkt_process_ipiq(); - crit_exit(); +#ifdef SMP + poll->cs_maxcount += count; + if (poll->cs_maxcount) { + if (poll->cs_maxcount == count) + ++gd->gd_curthread->td_cscount; + while (poll->cs_count != poll->cs_maxcount) { + crit_enter(); + lwkt_process_ipiq(); + crit_exit(); + } } +#endif } /* * Finish synchronization with a set of target cpus. The target cpus will * execute cs_fin1_func(poll) prior to this function returning, and will * execute cs_fin2_func(data) IN TANDEM WITH THIS FUNCTION'S RETURN. + * + * If cs_maxcount is non-zero then we are mastering a cpusync with one or + * more remote cpus and must account for it in our thread structure. */ void lwkt_cpusync_finish(lwkt_cpusync_t poll) { - int count; + globaldata_t gd = mycpu; - count = -(poll->cs_maxcount + 1); poll->cs_count = -1; - if (poll->cs_mask & (1 << mycpu->gd_cpuid)) { + if (poll->cs_mask & (1 << gd->gd_cpuid)) { if (poll->cs_fin1_func) poll->cs_fin1_func(poll); if (poll->cs_fin2_func) poll->cs_fin2_func(poll->cs_data); } - while (poll->cs_count != count) { - crit_enter(); - lwkt_process_ipiq(); - crit_exit(); +#ifdef SMP + if (poll->cs_maxcount) { + while (poll->cs_count != -(poll->cs_maxcount + 1)) { + crit_enter(); + lwkt_process_ipiq(); + crit_exit(); + } + --gd->gd_curthread->td_cscount; } +#endif } +#ifdef SMP + /* * helper IPI remote messaging function. * @@ -487,23 +549,4 @@ lwkt_cpusync_remote2(lwkt_cpusync_t poll) } } -#else - -/* - * !SMP dummy routines - */ - -int -lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg) -{ - panic("lwkt_send_ipiq: UP box! (%d,%p,%p)", target->gd_cpuid, func, arg); - return(0); /* NOT REACHED */ -} - -void -lwkt_wait_ipiq(globaldata_t target, int seq) -{ - panic("lwkt_wait_ipiq: UP box! (%d,%d)", target->gd_cpuid, seq); -} - #endif diff --git a/sys/kern/lwkt_thread.c b/sys/kern/lwkt_thread.c index 37b2022ea7..f8eb53c260 100644 --- a/sys/kern/lwkt_thread.c +++ b/sys/kern/lwkt_thread.c @@ -23,7 +23,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/kern/lwkt_thread.c,v 1.54 2004/02/15 02:14:41 dillon Exp $ + * $DragonFly: src/sys/kern/lwkt_thread.c,v 1.55 2004/02/17 19:38:49 dillon Exp $ */ /* @@ -31,11 +31,6 @@ * thread scheduler, which means that generally speaking we only need * to use a critical section to avoid problems. Foreign thread * scheduling is queued via (async) IPIs. - * - * NOTE: on UP machines smp_active is defined to be 0. On SMP machines - * smp_active is 0 prior to SMP activation, then it is 1. The LWKT module - * uses smp_active to optimize UP builds and to avoid sending IPIs during - * early boot (primarily interrupt and network thread initialization). */ #ifdef _KERNEL @@ -88,6 +83,9 @@ #endif static int untimely_switch = 0; +#ifdef INVARIANTS +static int panic_on_cscount = 0; +#endif static __int64_t switch_count = 0; static __int64_t preempt_hit = 0; static __int64_t preempt_miss = 0; @@ -96,6 +94,9 @@ static __int64_t preempt_weird = 0; #ifdef _KERNEL SYSCTL_INT(_lwkt, OID_AUTO, untimely_switch, CTLFLAG_RW, &untimely_switch, 0, ""); +#ifdef INVARIANTS +SYSCTL_INT(_lwkt, OID_AUTO, panic_on_cscount, CTLFLAG_RW, &panic_on_cscount, 0, ""); +#endif SYSCTL_QUAD(_lwkt, OID_AUTO, switch_count, CTLFLAG_RW, &switch_count, 0, ""); SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_hit, CTLFLAG_RW, &preempt_hit, 0, ""); SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_miss, CTLFLAG_RW, &preempt_miss, 0, ""); @@ -263,13 +264,19 @@ lwkt_init_thread(thread_t td, void *stack, int flags, struct globaldata *gd) td->td_pri = TDPRI_KERN_DAEMON + TDPRI_CRIT; lwkt_initport(&td->td_msgport, td); pmap_init_thread(td); - if (smp_active == 0 || gd == mycpu) { +#ifdef SMP + if (gd == mycpu) { crit_enter(); TAILQ_INSERT_TAIL(&gd->gd_tdallq, td, td_allq); crit_exit(); } else { lwkt_send_ipiq(gd, lwkt_init_thread_remote, td); } +#else + crit_enter(); + TAILQ_INSERT_TAIL(&gd->gd_tdallq, td, td_allq); + crit_exit(); +#endif } #endif /* _KERNEL */ @@ -412,6 +419,14 @@ lwkt_switch(void) * actual value of mp_lock is not stable). */ mpheld = MP_LOCK_HELD(); +#ifdef INVARIANTS + if (td->td_cscount) { + printf("Diagnostic: attempt to switch while mastering cpusync: %p\n", + td); + if (panic_on_cscount) + panic("switching while mastering cpusync"); + } +#endif #endif if ((ntd = td->td_preempted) != NULL) { /* @@ -796,16 +811,23 @@ lwkt_schedule(thread_t td) TAILQ_REMOVE(&w->wa_waitq, td, td_threadq); --w->wa_count; td->td_wait = NULL; - if (smp_active == 0 || td->td_gd == mycpu) { +#ifdef SMP + if (td->td_gd == mycpu) { _lwkt_enqueue(td); - if (td->td_preemptable) { + if (td->td_preemptable) td->td_preemptable(td, TDPRI_CRIT*2); /* YYY +token */ - } else if (_lwkt_wantresched(td, curthread)) { + else if (_lwkt_wantresched(td, curthread)) need_resched(); - } } else { lwkt_send_ipiq(td->td_gd, (ipifunc_t)lwkt_schedule, td); } +#else + _lwkt_enqueue(td); + if (td->td_preemptable) + td->td_preemptable(td, TDPRI_CRIT*2); /* YYY +token */ + else if (_lwkt_wantresched(td, curthread)) + need_resched(); +#endif lwkt_reltoken(&w->wa_token); } else { lwkt_send_ipiq(w->wa_token.t_cpu, (ipifunc_t)lwkt_schedule, td); @@ -817,7 +839,8 @@ lwkt_schedule(thread_t td) * do not own the thread there might be a race but the * target cpu will deal with it. */ - if (smp_active == 0 || td->td_gd == mycpu) { +#ifdef SMP + if (td->td_gd == mycpu) { _lwkt_enqueue(td); if (td->td_preemptable) { td->td_preemptable(td, TDPRI_CRIT); @@ -827,6 +850,14 @@ lwkt_schedule(thread_t td) } else { lwkt_send_ipiq(td->td_gd, (ipifunc_t)lwkt_schedule, td); } +#else + _lwkt_enqueue(td); + if (td->td_preemptable) { + td->td_preemptable(td, TDPRI_CRIT); + } else if (_lwkt_wantresched(td, curthread)) { + need_resched(); + } +#endif } } crit_exit(); diff --git a/sys/platform/pc32/apic/apic_vector.s b/sys/platform/pc32/apic/apic_vector.s index f9f158f19b..e3050c8632 100644 --- a/sys/platform/pc32/apic/apic_vector.s +++ b/sys/platform/pc32/apic/apic_vector.s @@ -1,7 +1,7 @@ /* * from: vector.s, 386BSD 0.1 unknown origin * $FreeBSD: src/sys/i386/isa/apic_vector.s,v 1.47.2.5 2001/09/01 22:33:38 tegge Exp $ - * $DragonFly: src/sys/platform/pc32/apic/apic_vector.s,v 1.16 2004/02/12 06:57:46 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/apic/apic_vector.s,v 1.17 2004/02/17 19:38:54 dillon Exp $ */ @@ -338,69 +338,6 @@ Xinvltlb: iret -#if 0 -#ifdef BETTER_CLOCK - -/* - * Executed by a CPU when it receives an Xcpucheckstate IPI from another CPU, - * - * - Stores current cpu state in checkstate_cpustate[cpuid] - * 0 == user, 1 == sys, 2 == intr - * - Stores current process in checkstate_curproc[cpuid] - * - * - Signals its receipt by setting bit cpuid in checkstate_probed_cpus. - * - * stack: 0->ds, 4->fs, 8->ebx, 12->eax, 16->eip, 20->cs, 24->eflags - */ - - .text - SUPERALIGN_TEXT - .globl Xcpucheckstate - .globl checkstate_cpustate - .globl checkstate_curproc - .globl checkstate_pc -Xcpucheckstate: - pushl %eax - pushl %ebx - pushl %ds /* save current data segment */ - pushl %fs - - movl $KDSEL, %eax - mov %ax, %ds /* use KERNEL data segment */ - movl $KPSEL, %eax - mov %ax, %fs - - movl $0, lapic_eoi /* End Of Interrupt to APIC */ - - movl $0, %ebx - movl 20(%esp), %eax - andl $3, %eax - cmpl $3, %eax - je 1f - testl $PSL_VM, 24(%esp) - jne 1f - incl %ebx /* system or interrupt */ -1: - movl PCPU(cpuid), %eax - movl %ebx, checkstate_cpustate(,%eax,4) - movl PCPU(curthread), %ebx - movl TD_PROC(%ebx),%ebx - movl %ebx, checkstate_curproc(,%eax,4) - movl 16(%esp), %ebx - movl %ebx, checkstate_pc(,%eax,4) - - lock /* checkstate_probed_cpus |= (1<gd_other_cpus && smp_started != 0) { + if (stopped_cpus != mycpu->gd_other_cpus) { db_printf("whoa, other_cpus: 0x%08x, stopped_cpus: 0x%08x\n", mycpu->gd_other_cpus, stopped_cpus); panic("stop_cpus() failed"); @@ -278,7 +278,7 @@ db_write_bytes(addr, size, data) } } - invltlb(); + cpu_invltlb(); } dst = (char *)addr; @@ -294,7 +294,7 @@ db_write_bytes(addr, size, data) if (ptep1) *ptep1 = oldmap1; - invltlb(); + cpu_invltlb(); } } diff --git a/sys/platform/pc32/i386/genassym.c b/sys/platform/pc32/i386/genassym.c index 89db328576..47e65b29f7 100644 --- a/sys/platform/pc32/i386/genassym.c +++ b/sys/platform/pc32/i386/genassym.c @@ -35,7 +35,7 @@ * * from: @(#)genassym.c 5.11 (Berkeley) 5/10/91 * $FreeBSD: src/sys/i386/i386/genassym.c,v 1.86.2.3 2002/03/03 05:42:49 nyan Exp $ - * $DragonFly: src/sys/platform/pc32/i386/genassym.c,v 1.33 2003/12/20 05:52:25 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/genassym.c,v 1.34 2004/02/17 19:38:53 dillon Exp $ */ #include @@ -201,7 +201,6 @@ ASSYM(GD_COMMON_TSS, offsetof(struct mdglobaldata, gd_common_tss)); ASSYM(GD_COMMON_TSSD, offsetof(struct mdglobaldata, gd_common_tssd)); ASSYM(GD_TSS_GDT, offsetof(struct mdglobaldata, gd_tss_gdt)); ASSYM(GD_NPXTHREAD, offsetof(struct mdglobaldata, gd_npxthread)); -ASSYM(GD_CPU_LOCKID, offsetof(struct mdglobaldata, gd_cpu_lockid)); ASSYM(GD_OTHER_CPUS, offsetof(struct mdglobaldata, gd_other_cpus)); ASSYM(GD_SS_EFLAGS, offsetof(struct mdglobaldata, gd_ss_eflags)); ASSYM(GD_CMAP1, offsetof(struct mdglobaldata, gd_CMAP1)); diff --git a/sys/platform/pc32/i386/globals.s b/sys/platform/pc32/i386/globals.s index d10512b917..159410e6cd 100644 --- a/sys/platform/pc32/i386/globals.s +++ b/sys/platform/pc32/i386/globals.s @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/globals.s,v 1.13.2.1 2000/05/16 06:58:06 dillon Exp $ - * $DragonFly: src/sys/platform/pc32/i386/globals.s,v 1.19 2003/12/20 05:52:25 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/globals.s,v 1.20 2004/02/17 19:38:53 dillon Exp $ */ #include @@ -73,7 +73,7 @@ * The BSP version of these get setup in locore.s and pmap.c, while * the AP versions are setup in mp_machdep.c. */ - .globl gd_cpuid, gd_cpu_lockid, gd_other_cpus + .globl gd_cpuid, gd_other_cpus .globl gd_ss_eflags, gd_intr_nesting_level .globl gd_CMAP1, gd_CMAP2, gd_CMAP3, gd_PMAP1 .globl gd_CADDR1, gd_CADDR2, gd_CADDR3, gd_PADDR1 @@ -81,7 +81,6 @@ .set gd_cpuid,globaldata + GD_CPUID .set gd_private_tss,globaldata + GD_PRIVATE_TSS - .set gd_cpu_lockid,globaldata + GD_CPU_LOCKID .set gd_other_cpus,globaldata + GD_OTHER_CPUS .set gd_ss_eflags,globaldata + GD_SS_EFLAGS .set gd_intr_nesting_level,globaldata + GD_INTR_NESTING_LEVEL diff --git a/sys/platform/pc32/i386/machdep.c b/sys/platform/pc32/i386/machdep.c index 49343b2845..94404fe6a4 100644 --- a/sys/platform/pc32/i386/machdep.c +++ b/sys/platform/pc32/i386/machdep.c @@ -36,7 +36,7 @@ * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $ - * $DragonFly: src/sys/platform/pc32/i386/machdep.c,v 1.53 2004/02/14 19:58:50 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/machdep.c,v 1.54 2004/02/17 19:38:53 dillon Exp $ */ #include "use_apm.h" @@ -1665,7 +1665,7 @@ physmap_done: * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_N; - invltlb(); + cpu_invltlb(); tmp = *(int *)ptr; /* @@ -1734,7 +1734,7 @@ physmap_done: } } *pte = 0; - invltlb(); + cpu_invltlb(); /* * XXX diff --git a/sys/platform/pc32/i386/mp_machdep.c b/sys/platform/pc32/i386/mp_machdep.c index 4f116faf63..12d6d3719e 100644 --- a/sys/platform/pc32/i386/mp_machdep.c +++ b/sys/platform/pc32/i386/mp_machdep.c @@ -23,7 +23,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.115.2.15 2003/03/14 21:22:35 jhb Exp $ - * $DragonFly: src/sys/platform/pc32/i386/mp_machdep.c,v 1.21 2004/01/30 05:42:16 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/mp_machdep.c,v 1.22 2004/02/17 19:38:53 dillon Exp $ */ #include "opt_cpu.h" @@ -40,9 +40,6 @@ #include #include #include -#ifdef BETTER_CLOCK -#include -#endif #include /* cngetc() */ #include @@ -270,10 +267,6 @@ int cpu_num_to_apic_id[NAPICID]; int io_num_to_apic_id[NAPICID]; int apic_id_to_logical[NAPICID]; - -/* Bitmap of all available CPUs */ -u_int all_cpus; - /* AP uses this during bootstrap. Do not staticize. */ char *bootSTK; static int bootAP; @@ -286,8 +279,6 @@ extern pt_entry_t *SMPpt; struct pcb stoppcbs[MAXCPU]; -int smp_started; /* has the system started? */ - /* * Local data and functions. */ @@ -295,6 +286,7 @@ int smp_started; /* has the system started? */ static int mp_capable; static u_int boot_address; static u_int base_memory; +static cpumask_t smp_startup_mask = 1; /* which cpus have been started */ static int picmode; /* 0: virtual wire mode, 1: PIC mode */ static mpfps_t mpfps; @@ -309,9 +301,12 @@ static void fix_mp_table(void); static void setup_apic_irq_mapping(void); static int start_all_aps(u_int boot_addr); static void install_ap_tramp(u_int boot_addr); -static int start_ap(int logicalCpu, u_int boot_addr); +static int start_ap(struct mdglobaldata *gd, u_int boot_addr); static int apic_int_is_bus_type(int intr, int bus_type); +cpumask_t smp_active_mask = 1; /* which cpus are ready for IPIs etc? */ +SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RD, &smp_active_mask, 0, ""); + /* * Calculate usable address in base memory for AP trampoline code. */ @@ -433,11 +428,14 @@ init_secondary(void) int x, myid = bootAP; u_int cr0; struct mdglobaldata *md; + struct privatespace *ps; - gdt_segs[GPRIV_SEL].ssd_base = (int) &CPU_prvspace[myid]; + ps = &CPU_prvspace[myid]; + + gdt_segs[GPRIV_SEL].ssd_base = (int)ps; gdt_segs[GPROC0_SEL].ssd_base = - (int) &CPU_prvspace[myid].mdglobaldata.gd_common_tss; - CPU_prvspace[myid].mdglobaldata.mi.gd_prvspace = &CPU_prvspace[myid]; + (int) &ps->mdglobaldata.gd_common_tss; + ps->mdglobaldata.mi.gd_prvspace = ps; for (x = 0; x < NGDT; x++) { ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); @@ -455,7 +453,7 @@ init_secondary(void) gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; - md = mdcpu; + md = mdcpu; /* loaded through %fs:0 (mdglobaldata.mi.gd_prvspace)*/ md->gd_common_tss.tss_esp0 = 0; /* not used until after switch */ md->gd_common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); @@ -534,13 +532,13 @@ mp_enable(u_int boot_addr) /* turn on 4MB of V == P addressing so we can get to MP table */ *(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME); - invltlb(); + cpu_invltlb(); /* examine the MP table for needed info, uses physical addresses */ x = mptable_pass2(); *(int *)PTD = 0; - invltlb(); + cpu_invltlb(); /* can't process default configs till the CPU APIC is pmapped */ if (x) @@ -572,14 +570,6 @@ mp_enable(u_int boot_addr) setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); -#if 0 -#ifdef BETTER_CLOCK - /* install an inter-CPU IPI for reading processor state */ - setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate, - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); -#endif -#endif - /* install an inter-CPU IPI for IPIQ messaging */ setidt(XIPIQ_OFFSET, Xipiq, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); @@ -588,16 +578,6 @@ mp_enable(u_int boot_addr) setidt(XRENDEZVOUS_OFFSET, Xrendezvous, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); -#if 0 - /* install an inter-CPU IPI for forcing an additional software trap */ - setidt(XCPUAST_OFFSET, Xcpuast, - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - - /* install an inter-CPU IPI for interrupt forwarding */ - setidt(XFORWARD_IRQ_OFFSET, Xforward_irq, - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); -#endif - /* install an inter-CPU IPI for CPU stop/restart */ setidt(XCPUSTOP_OFFSET, Xcpustop, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); @@ -1969,6 +1949,7 @@ start_all_aps(u_int boot_addr) u_char mpbiosreason; u_long mpbioswarmvec; struct mdglobaldata *gd; + struct privatespace *ps; char *stack; uintptr_t kptbase; @@ -1989,16 +1970,13 @@ start_all_aps(u_int boot_addr) mpbiosreason = inb(CMOS_DATA); #endif - /* record BSP in CPU map */ - all_cpus = 1; - /* set up temporary P==V mapping for AP boot */ /* XXX this is a hack, we should boot the AP on its own stack/PTD */ kptbase = (uintptr_t)(void *)KPTphys; for (x = 0; x < NKPT; x++) PTD[x] = (pd_entry_t)(PG_V | PG_RW | ((kptbase + x * PAGE_SIZE) & PG_FRAME)); - invltlb(); + cpu_invltlb(); /* start each AP */ for (x = 1; x <= mp_naps; ++x) { @@ -2028,20 +2006,19 @@ start_all_aps(u_int boot_addr) gd = &CPU_prvspace[x].mdglobaldata; /* official location */ bzero(gd, sizeof(*gd)); - gd->mi.gd_prvspace = &CPU_prvspace[x]; + gd->mi.gd_prvspace = ps = &CPU_prvspace[x]; /* prime data page for it to use */ mi_gdinit(&gd->mi, x); cpu_gdinit(gd, x); - gd->gd_cpu_lockid = x << 24; gd->gd_CMAP1 = &SMPpt[pg + 1]; gd->gd_CMAP2 = &SMPpt[pg + 2]; gd->gd_CMAP3 = &SMPpt[pg + 3]; gd->gd_PMAP1 = &SMPpt[pg + 4]; - gd->gd_CADDR1 = CPU_prvspace[x].CPAGE1; - gd->gd_CADDR2 = CPU_prvspace[x].CPAGE2; - gd->gd_CADDR3 = CPU_prvspace[x].CPAGE3; - gd->gd_PADDR1 = (unsigned *)CPU_prvspace[x].PPAGE1; + gd->gd_CADDR1 = ps->CPAGE1; + gd->gd_CADDR2 = ps->CPAGE2; + gd->gd_CADDR3 = ps->CPAGE3; + gd->gd_PADDR1 = (unsigned *)ps->PPAGE1; gd->mi.gd_ipiq = (void *)kmem_alloc(kernel_map, sizeof(lwkt_ipiq) * (mp_naps + 1)); bzero(gd->mi.gd_ipiq, sizeof(lwkt_ipiq) * (mp_naps + 1)); @@ -2056,12 +2033,12 @@ start_all_aps(u_int boot_addr) /* * Setup the AP boot stack */ - bootSTK = &CPU_prvspace[x].idlestack[UPAGES*PAGE_SIZE/2]; + bootSTK = &ps->idlestack[UPAGES*PAGE_SIZE/2]; bootAP = x; /* attempt to start the Application Processor */ CHECK_INIT(99); /* setup checkpoints */ - if (!start_ap(x, boot_addr)) { + if (!start_ap(gd, boot_addr)) { printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x)); CHECK_PRINT("trace"); /* show checkpoints */ /* better panic as the AP may be running loose */ @@ -2073,12 +2050,13 @@ start_all_aps(u_int boot_addr) /* record its version info */ cpu_apic_versions[x] = cpu_apic_versions[0]; - - all_cpus |= (1 << x); /* record AP in CPU map */ } + /* set ncpus to 1 + highest logical cpu. Not all may have come up */ + ncpus = x; + /* build our map of 'other' CPUs */ - mycpu->gd_other_cpus = all_cpus & ~(1 << mycpu->gd_cpuid); + mycpu->gd_other_cpus = smp_startup_mask & ~(1 << mycpu->gd_cpuid); mycpu->gd_ipiq = (void *)kmem_alloc(kernel_map, sizeof(lwkt_ipiq) * ncpus); bzero(mycpu->gd_ipiq, sizeof(lwkt_ipiq) * ncpus); @@ -2176,24 +2154,20 @@ install_ap_tramp(u_int boot_addr) * before the AP goes into the LWKT scheduler's idle loop. */ static int -start_ap(int logical_cpu, u_int boot_addr) +start_ap(struct mdglobaldata *gd, u_int boot_addr) { int physical_cpu; int vector; - int cpus; u_long icr_lo, icr_hi; POSTCODE(START_AP_POST); /* get the PHYSICAL APIC ID# */ - physical_cpu = CPU_TO_ID(logical_cpu); + physical_cpu = CPU_TO_ID(gd->mi.gd_cpuid); /* calculate the vector */ vector = (boot_addr >> 12) & 0xff; - /* used as a watchpoint to signal AP startup */ - cpus = ncpus; - /* Make sure the target cpu sees everything */ wbinvd(); @@ -2255,7 +2229,7 @@ start_ap(int logical_cpu, u_int boot_addr) /* wait for it to start, see ap_init() */ set_apic_timer(5000000);/* == 5 seconds */ while (read_apic_timer()) { - if (ncpus > cpus) + if (smp_startup_mask & (1 << gd->mi.gd_cpuid)) return 1; /* return SUCCESS */ } return 0; /* return FAILURE */ @@ -2263,16 +2237,21 @@ start_ap(int logical_cpu, u_int boot_addr) /* - * Flush the TLB on all other CPU's + * Lazy flush the TLB on all other CPU's. DEPRECATED. * - * XXX: Needs to handshake and wait for completion before proceding. + * If for some reason we were unable to start all cpus we cannot safely + * use broadcast IPIs. */ void smp_invltlb(void) { #if defined(APIC_IO) - if (smp_started && invltlb_ok) + if (smp_startup_mask == smp_active_mask) { all_but_self_ipi(XINVLTLB_OFFSET); + } else { + selected_apic_ipi(smp_active_mask, XINVLTLB_OFFSET, + APIC_DELMODE_FIXED); + } #endif /* APIC_IO */ } @@ -2296,8 +2275,7 @@ smp_invltlb(void) int stop_cpus(u_int map) { - if (!smp_started) - return 0; + map &= smp_active_mask; /* send the Xcpustop IPI to all CPUs in map */ selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED); @@ -2325,10 +2303,8 @@ stop_cpus(u_int map) int restart_cpus(u_int map) { - if (!smp_started) - return 0; - - started_cpus = map; /* signal other cpus to restart */ + /* signal other cpus to restart */ + started_cpus = map & smp_active_mask; while ((stopped_cpus & map) != 0) /* wait for each to clear its bit */ /* spin */ ; @@ -2336,36 +2312,6 @@ restart_cpus(u_int map) return 1; } -int smp_active = 0; /* are the APs allowed to run? */ -SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, ""); - -/* XXX maybe should be hw.ncpu */ -static int smp_cpus = 1; /* how many cpu's running */ -SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, ""); - -int invltlb_ok = 0; /* throttle smp_invltlb() till safe */ -SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, ""); - -/* Warning: Do not staticize. Used from swtch.s */ -int do_page_zero_idle = 1; /* bzero pages for fun and profit in idleloop */ -SYSCTL_INT(_machdep, OID_AUTO, do_page_zero_idle, CTLFLAG_RW, - &do_page_zero_idle, 0, ""); - -/* Is forwarding of a interrupt to the CPU holding the ISR lock enabled ? */ -int forward_irq_enabled = 1; -SYSCTL_INT(_machdep, OID_AUTO, forward_irq_enabled, CTLFLAG_RW, - &forward_irq_enabled, 0, ""); - -/* Enable forwarding of a signal to a process running on a different CPU */ -static int forward_signal_enabled = 1; -SYSCTL_INT(_machdep, OID_AUTO, forward_signal_enabled, CTLFLAG_RW, - &forward_signal_enabled, 0, ""); - -/* Enable forwarding of roundrobin to all other cpus */ -static int forward_roundrobin_enabled = 1; -SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW, - &forward_roundrobin_enabled, 0, ""); - /* * This is called once the mpboot code has gotten us properly relocated * and the MMU turned on, etc. ap_init() is actually the idle thread, @@ -2380,11 +2326,16 @@ ap_init(void) u_int apic_id; /* - * Signal the BSP that we have started up successfully by incrementing - * ncpus. Note that we do not hold the BGL yet. The BSP is waiting - * for our signal. + * Adjust smp_startup_mask to signal the BSP that we have started + * up successfully. Note that we do not yet hold the BGL. The BSP + * is waiting for our signal. + * + * We can't set our bit in smp_active_mask yet because we are holding + * interrupts physically disabled and remote cpus could deadlock + * trying to send us an IPI. */ - ++ncpus; + smp_startup_mask |= 1 << mycpu->gd_cpuid; + cpu_mb1(); /* * Get the MP lock so we can finish initializing. Note: we are @@ -2397,13 +2348,14 @@ ap_init(void) /* BSP may have changed PTD while we're waiting for the lock */ cpu_invltlb(); + smp_active_mask |= 1 << mycpu->gd_cpuid; #if defined(I586_CPU) && !defined(NO_F00F_HACK) lidt(&r_idt); #endif /* Build our map of 'other' CPUs. */ - mycpu->gd_other_cpus = all_cpus & ~(1 << mycpu->gd_cpuid); + mycpu->gd_other_cpus = smp_startup_mask & ~(1 << mycpu->gd_cpuid); printf("SMP: AP CPU #%d Launched!\n", mycpu->gd_cpuid); @@ -2431,18 +2383,6 @@ ap_init(void) /* Set memory range attributes for this CPU to match the BSP */ mem_range_AP_init(); - /* - * Since we have the BGL if smp_cpus matches ncpus then we are - * the last AP to get to this point and we can enable IPI's, - * tlb shootdowns, freezes, and so forth. - */ - ++smp_cpus; - if (smp_cpus == ncpus) { - invltlb_ok = 1; - smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */ - smp_active = 1; /* historic */ - } - /* * AP helper function for kernel memory support. This will create * a memory reserve for the AP that is necessary to avoid certain @@ -2464,288 +2404,6 @@ ap_init(void) rel_mplock(); } -#ifdef BETTER_CLOCK - -#define CHECKSTATE_USER 0 -#define CHECKSTATE_SYS 1 -#define CHECKSTATE_INTR 2 - -/* Do not staticize. Used from apic_vector.s */ -struct thread *checkstate_curtd[MAXCPU]; -int checkstate_cpustate[MAXCPU]; -u_long checkstate_pc[MAXCPU]; - -#define PC_TO_INDEX(pc, prof) \ - ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \ - (u_quad_t)((prof)->pr_scale)) >> 16) & ~1) - -#if 0 -static void -addupc_intr_forwarded(struct proc *p, int id, int *astmap) -{ - int i; - struct uprof *prof; - u_long pc; - - pc = checkstate_pc[id]; - prof = &p->p_stats->p_prof; - if (pc >= prof->pr_off && - (i = PC_TO_INDEX(pc, prof)) < prof->pr_size) { - if ((p->p_flag & P_OWEUPC) == 0) { - prof->pr_addr = pc; - prof->pr_ticks = 1; - p->p_flag |= P_OWEUPC; - } - *astmap |= (1 << id); - } -} -#endif - -#if 0 -static void -forwarded_statclock(int id, int pscnt, int *astmap) -{ -#if 0 - struct pstats *pstats; - long rss; - struct rusage *ru; - struct vmspace *vm; - int cpustate; - struct thread *td; -#ifdef GPROF - register struct gmonparam *g; - int i; -#endif - - t = checkstate_curtd[id]; - cpustate = checkstate_cpustate[id]; - - switch (cpustate) { - case CHECKSTATE_USER: - if (td->td_proc && td->td_proc->p_flag & P_PROFIL) - addupc_intr_forwarded(td->td_proc, id, astmap); - if (pscnt > 1) - return; - p->p_uticks++; - if (p->p_nice > NZERO) - cp_time[CP_NICE]++; - else - cp_time[CP_USER]++; - break; - case CHECKSTATE_SYS: -#ifdef GPROF - /* - * Kernel statistics are just like addupc_intr, only easier. - */ - g = &_gmonparam; - if (g->state == GMON_PROF_ON) { - i = checkstate_pc[id] - g->lowpc; - if (i < g->textsize) { - i /= HISTFRACTION * sizeof(*g->kcount); - g->kcount[i]++; - } - } -#endif - if (pscnt > 1) - return; - - if (!p) - cp_time[CP_IDLE]++; - else { - p->p_sticks++; - cp_time[CP_SYS]++; - } - break; - case CHECKSTATE_INTR: - default: -#ifdef GPROF - /* - * Kernel statistics are just like addupc_intr, only easier. - */ - g = &_gmonparam; - if (g->state == GMON_PROF_ON) { - i = checkstate_pc[id] - g->lowpc; - if (i < g->textsize) { - i /= HISTFRACTION * sizeof(*g->kcount); - g->kcount[i]++; - } - } -#endif - if (pscnt > 1) - return; - if (p) - p->p_iticks++; - cp_time[CP_INTR]++; - } - if (p != NULL) { - schedclock(p); - - /* Update resource usage integrals and maximums. */ - if ((pstats = p->p_stats) != NULL && - (ru = &pstats->p_ru) != NULL && - (vm = p->p_vmspace) != NULL) { - ru->ru_ixrss += pgtok(vm->vm_tsize); - ru->ru_idrss += pgtok(vm->vm_dsize); - ru->ru_isrss += pgtok(vm->vm_ssize); - rss = pgtok(vmspace_resident_count(vm)); - if (ru->ru_maxrss < rss) - ru->ru_maxrss = rss; - } - } -#endif -} -#endif - -#if 0 -void -forward_statclock(int pscnt) -{ - int map; - int id; - int i; - - /* Kludge. We don't yet have separate locks for the interrupts - * and the kernel. This means that we cannot let the other processors - * handle complex interrupts while inhibiting them from entering - * the kernel in a non-interrupt context. - * - * What we can do, without changing the locking mechanisms yet, - * is letting the other processors handle a very simple interrupt - * (wich determines the processor states), and do the main - * work ourself. - */ - - if (!smp_started || !invltlb_ok || cold || panicstr) - return; - - printf("forward_statclock\n"); - /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle ) */ - - map = mycpu->gd_other_cpus & ~stopped_cpus ; - checkstate_probed_cpus = 0; - if (map != 0) - selected_apic_ipi(map, - XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED); - - i = 0; - while (checkstate_probed_cpus != map) { - /* spin */ - i++; - if (i == 100000) { -#ifdef BETTER_CLOCK_DIAGNOSTIC - printf("forward_statclock: checkstate %x\n", - checkstate_probed_cpus); -#endif - break; - } - } - - /* - * Step 2: walk through other processors processes, update ticks and - * profiling info. - */ - - map = 0; - for (id = 0; id < ncpus; id++) { - if (id == mycpu->gd_cpuid) - continue; - if (((1 << id) & checkstate_probed_cpus) == 0) - continue; - forwarded_statclock(id, pscnt, &map); - } - if (map != 0) - resched_cpus(map); -} -#endif - -#if 0 -void -forward_hardclock(int pscnt) -{ - int map; - int id; -#if 0 - struct proc *p; - struct pstats *pstats; -#endif - int i; - - /* Kludge. We don't yet have separate locks for the interrupts - * and the kernel. This means that we cannot let the other processors - * handle complex interrupts while inhibiting them from entering - * the kernel in a non-interrupt context. - * - * What we can do, without changing the locking mechanisms yet, - * is letting the other processors handle a very simple interrupt - * (wich determines the processor states), and do the main - * work ourself. - */ - - if (!smp_started || !invltlb_ok || cold || panicstr) - return; - - /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle) */ - - map = mycpu->gd_other_cpus & ~stopped_cpus ; - checkstate_probed_cpus = 0; - if (map != 0) - selected_apic_ipi(map, - XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED); - - i = 0; - while (checkstate_probed_cpus != map) { - /* spin */ - i++; - if (i == 100000) { -#ifdef BETTER_CLOCK_DIAGNOSTIC - printf("forward_hardclock: checkstate %x\n", - checkstate_probed_cpus); -#endif - break; - } - } - - /* - * Step 2: walk through other processors processes, update virtual - * timer and profiling timer. If stathz == 0, also update ticks and - * profiling info. - */ - - map = 0; - for (id = 0; id < ncpus; id++) { - if (id == mycpu->gd_cpuid) - continue; - if (((1 << id) & checkstate_probed_cpus) == 0) - continue; - printf("forward_hardclock\n"); -#if 0 - p = checkstate_curproc[id]; - if (p) { - pstats = p->p_stats; - if (checkstate_cpustate[id] == CHECKSTATE_USER && - timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && - itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) { - psignal(p, SIGVTALRM); - map |= (1 << id); - } - if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) && - itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) { - psignal(p, SIGPROF); - map |= (1 << id); - } - } - if (stathz == 0) { - forwarded_statclock( id, pscnt, &map); - } -#endif - } - if (map != 0) - resched_cpus(map); -} -#endif - -#endif /* BETTER_CLOCK */ - #ifdef APIC_INTR_REORDER /* * Maintain mapping from softintr vector to isr bit in local apic. @@ -2763,6 +2421,8 @@ set_lapic_isrloc(int intr, int vector) #endif /* + * XXX DEPRECATED + * * All-CPU rendezvous. CPUs are signalled, all execute the setup function * (if specified), rendezvous, execute the action function (if specified), * rendezvous again, execute the teardown function (if specified), and then @@ -2816,8 +2476,17 @@ smp_rendezvous(void (* setup_func)(void *), smp_rv_waiters[0] = 0; smp_rv_waiters[1] = 0; - /* signal other processors, which will enter the IPI with interrupts off */ - all_but_self_ipi(XRENDEZVOUS_OFFSET); + /* + * Signal other processors which will enter the IPI with interrupts + * disabled. We cannot safely use broadcast IPIs if some of our + * cpus failed to start. + */ + if (smp_startup_mask == smp_active_mask) { + all_but_self_ipi(XRENDEZVOUS_OFFSET); + } else { + selected_apic_ipi(smp_active_mask, XRENDEZVOUS_OFFSET, + APIC_DELMODE_FIXED); + } /* call executor function */ smp_rendezvous_action(); @@ -2829,5 +2498,6 @@ smp_rendezvous(void (* setup_func)(void *), void cpu_send_ipiq(int dcpu) { - selected_apic_ipi(1 << dcpu, XIPIQ_OFFSET, APIC_DELMODE_FIXED); + if ((1 << dcpu) & smp_active_mask) + selected_apic_ipi(1 << dcpu, XIPIQ_OFFSET, APIC_DELMODE_FIXED); } diff --git a/sys/platform/pc32/i386/pmap.c b/sys/platform/pc32/i386/pmap.c index a762b550bd..1b24e6e729 100644 --- a/sys/platform/pc32/i386/pmap.c +++ b/sys/platform/pc32/i386/pmap.c @@ -40,7 +40,7 @@ * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ - * $DragonFly: src/sys/platform/pc32/i386/pmap.c,v 1.29 2004/02/14 20:34:27 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/pmap.c,v 1.30 2004/02/17 19:38:53 dillon Exp $ */ /* @@ -105,6 +105,8 @@ #include #endif /* SMP || APIC_IO */ #include +#include +#include #define PMAP_KEEP_PDIRS #ifndef PMAP_SHPGPERPROC @@ -135,8 +137,6 @@ #define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) #define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) -#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W)) -#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) /* * Given a map and a machine independent protection code, @@ -196,11 +196,12 @@ static __inline void pmap_changebit (vm_page_t m, int bit, boolean_t setem); static void pmap_remove_all (vm_page_t m); static vm_page_t pmap_enter_quick (pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte); -static int pmap_remove_pte (struct pmap *pmap, unsigned *ptq, - vm_offset_t sva); -static void pmap_remove_page (struct pmap *pmap, vm_offset_t va); +static int pmap_remove_pte (struct pmap *pmap, unsigned *ptq, + vm_offset_t sva, pmap_inval_info_t info); +static void pmap_remove_page (struct pmap *pmap, + vm_offset_t va, pmap_inval_info_t info); static int pmap_remove_entry (struct pmap *pmap, vm_page_t m, - vm_offset_t va); + vm_offset_t va, pmap_inval_info_t info); static boolean_t pmap_testbit (vm_page_t m, int bit); static void pmap_insert_entry (pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m); @@ -211,7 +212,7 @@ static int pmap_release_free_page (pmap_t pmap, vm_page_t p); static vm_page_t _pmap_allocpte (pmap_t pmap, unsigned ptepindex); static unsigned * pmap_pte_quick (pmap_t pmap, vm_offset_t va); static vm_page_t pmap_page_lookup (vm_object_t object, vm_pindex_t pindex); -static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t); +static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t, pmap_inval_info_t); static vm_offset_t pmap_kmem_choose(vm_offset_t addr); static unsigned pdir4mb; @@ -439,7 +440,7 @@ pmap_bootstrap(firstaddr, loadaddr) */ PTD[KPTDI] = (pd_entry_t)ptditmp; kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t)ptditmp; - invltlb(); + cpu_invltlb(); #endif } #endif @@ -463,7 +464,7 @@ pmap_bootstrap(firstaddr, loadaddr) gd->gd_CADDR3 = CPU_prvspace[0].CPAGE3; gd->gd_PADDR1 = (unsigned *)CPU_prvspace[0].PPAGE1; - invltlb(); + cpu_invltlb(); } #ifdef SMP @@ -590,47 +591,6 @@ pmap_track_modified(vm_offset_t va) return 0; } -static PMAP_INLINE void -invltlb_1pg(vm_offset_t va) -{ -#if defined(I386_CPU) - if (cpu_class == CPUCLASS_386) { - invltlb(); - } else -#endif - { - invlpg(va); - } -} - -static __inline void -pmap_TLB_invalidate(pmap_t pmap, vm_offset_t va) -{ -#if defined(SMP) - if (pmap->pm_active & (1 << mycpu->gd_cpuid)) - cpu_invlpg((void *)va); - if (pmap->pm_active & mycpu->gd_other_cpus) - smp_invltlb(); -#else - if (pmap->pm_active) - invltlb_1pg(va); -#endif -} - -static __inline void -pmap_TLB_invalidate_all(pmap_t pmap) -{ -#if defined(SMP) - if (pmap->pm_active & (1 << mycpu->gd_cpuid)) - cpu_invltlb(); - if (pmap->pm_active & mycpu->gd_other_cpus) - smp_invltlb(); -#else - if (pmap->pm_active) - invltlb(); -#endif -} - static unsigned * get_ptbase(pmap_t pmap) { @@ -647,12 +607,8 @@ get_ptbase(pmap_t pmap) if (frame != (((unsigned) APTDpde) & PG_FRAME)) { APTDpde = (pd_entry_t)(frame | PG_RW | PG_V); -#if defined(SMP) /* The page directory is not shared between CPUs */ cpu_invltlb(); -#else - invltlb(); -#endif } return (unsigned *) APTmap; } @@ -737,13 +693,15 @@ PMAP_INLINE void pmap_kenter(vm_offset_t va, vm_paddr_t pa) { unsigned *pte; - unsigned npte, opte; + unsigned npte; + pmap_inval_info info; + pmap_inval_init(&info); + pmap_inval_add(&info, kernel_pmap, va); npte = pa | PG_RW | PG_V | pgeflag; pte = (unsigned *)vtopte(va); - opte = *pte; *pte = npte; - invltlb_1pg(va); + pmap_inval_flush(&info); } /* @@ -753,10 +711,13 @@ PMAP_INLINE void pmap_kremove(vm_offset_t va) { unsigned *pte; + pmap_inval_info info; + pmap_inval_init(&info); + pmap_inval_add(&info, kernel_pmap, va); pte = (unsigned *)vtopte(va); *pte = 0; - invltlb_1pg(va); + pmap_inval_flush(&info); } /* @@ -798,16 +759,12 @@ pmap_qenter(vm_offset_t va, vm_page_t *m, int count) pte = (unsigned *)vtopte(va); *pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag; -#ifdef SMP cpu_invlpg((void *)va); -#else - invltlb_1pg(va); -#endif va += PAGE_SIZE; m++; } #ifdef SMP - smp_invltlb(); + smp_invltlb(); /* XXX */ #endif } @@ -827,11 +784,7 @@ pmap_qremove(vm_offset_t va, int count) pte = (unsigned *)vtopte(va); *pte = 0; -#ifdef SMP cpu_invlpg((void *)va); -#else - invltlb_1pg(va); -#endif va += PAGE_SIZE; } #ifdef SMP @@ -965,8 +918,9 @@ pmap_swapin_proc(struct proc *p) * drops to zero, then it decrements the wire count. */ static int -_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) +_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, pmap_inval_info_t info) { + pmap_inval_flush(info); while (vm_page_sleep_busy(m, FALSE, "pmuwpt")) ; @@ -975,6 +929,7 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) /* * unmap the page table page */ + pmap_inval_add(info, pmap, -1); pmap->pm_pdir[m->pindex] = 0; --pmap->pm_stats.resident_count; if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) == @@ -984,7 +939,6 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) * take effect immediately. */ pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex); - pmap_TLB_invalidate(pmap, pteva); } if (pmap->pm_ptphint == m) @@ -995,7 +949,6 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) */ --m->wire_count; if (m->wire_count == 0) { - vm_page_flash(m); vm_page_busy(m); vm_page_free_zero(m); @@ -1007,11 +960,11 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) } static PMAP_INLINE int -pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) +pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, pmap_inval_info_t info) { vm_page_unhold(m); if (m->hold_count == 0) - return _pmap_unwire_pte_hold(pmap, m); + return _pmap_unwire_pte_hold(pmap, m, info); else return 0; } @@ -1021,7 +974,8 @@ pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) * conditionally free the page, and manage the hold/wire counts. */ static int -pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) +pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte, + pmap_inval_info_t info) { unsigned ptepindex; if (va >= UPT_MIN_ADDRESS) @@ -1033,12 +987,13 @@ pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) (pmap->pm_ptphint->pindex == ptepindex)) { mpte = pmap->pm_ptphint; } else { + pmap_inval_flush(info); mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex); pmap->pm_ptphint = mpte; } } - return pmap_unwire_pte_hold(pmap, mpte); + return pmap_unwire_pte_hold(pmap, mpte, info); } void @@ -1255,7 +1210,8 @@ pmap_allocpte(pmap_t pmap, vm_offset_t va) if (ptepa & PG_PS) { pmap->pm_pdir[ptepindex] = 0; ptepa = 0; - invltlb(); + cpu_invltlb(); + smp_invltlb(); } /* @@ -1501,9 +1457,9 @@ pmap_collect(void) * to the header. Otherwise we must search the list for * the entry. In either case we free the now unused entry. */ - static int -pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va) +pmap_remove_entry(struct pmap *pmap, vm_page_t m, + vm_offset_t va, pmap_inval_info_t info) { pv_entry_t pv; int rtval; @@ -1524,17 +1480,14 @@ pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va) rtval = 0; if (pv) { - - rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); + rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem, info); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); m->md.pv_list_count--; if (TAILQ_FIRST(&m->md.pv_list) == NULL) vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); - TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); free_pv_entry(pv); } - splx(s); return rtval; } @@ -1566,20 +1519,23 @@ pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m) * pmap_remove_pte: do the things to unmap a page in a process */ static int -pmap_remove_pte(struct pmap *pmap, unsigned *ptq, vm_offset_t va) +pmap_remove_pte(struct pmap *pmap, unsigned *ptq, vm_offset_t va, + pmap_inval_info_t info) { unsigned oldpte; vm_page_t m; + pmap_inval_add(info, pmap, va); oldpte = loadandclear(ptq); if (oldpte & PG_W) pmap->pm_stats.wired_count -= 1; /* * Machines that don't support invlpg, also don't support - * PG_G. + * PG_G. XXX PG_G is disabled for SMP so don't worry about + * the SMP case. */ if (oldpte & PG_G) - invlpg(va); + cpu_invlpg(va); pmap->pm_stats.resident_count -= 1; if (oldpte & PG_MANAGED) { m = PHYS_TO_VM_PAGE(oldpte); @@ -1596,9 +1552,9 @@ pmap_remove_pte(struct pmap *pmap, unsigned *ptq, vm_offset_t va) } if (oldpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); - return pmap_remove_entry(pmap, m, va); + return pmap_remove_entry(pmap, m, va, info); } else { - return pmap_unuse_pt(pmap, va, NULL); + return pmap_unuse_pt(pmap, va, NULL, info); } return 0; @@ -1613,7 +1569,7 @@ pmap_remove_pte(struct pmap *pmap, unsigned *ptq, vm_offset_t va) * not kernel_pmap. */ static void -pmap_remove_page(struct pmap *pmap, vm_offset_t va) +pmap_remove_page(struct pmap *pmap, vm_offset_t va, pmap_inval_info_t info) { unsigned *ptq; @@ -1624,14 +1580,13 @@ pmap_remove_page(struct pmap *pmap, vm_offset_t va) if (*pmap_pde(pmap, va) != 0) { ptq = get_ptbase(pmap) + i386_btop(va); if (*ptq) { - (void) pmap_remove_pte(pmap, ptq, va); - pmap_TLB_invalidate(pmap, va); + pmap_remove_pte(pmap, ptq, va, info); } } } /* - * pmap_remopve: + * pmap_remove: * * Remove the given range of addresses from the specified map. * @@ -1648,7 +1603,7 @@ pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) vm_offset_t pdnxt; vm_offset_t ptpaddr; vm_offset_t sindex, eindex; - int anyvalid; + struct pmap_inval_info info; if (pmap == NULL) return; @@ -1656,6 +1611,8 @@ pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) if (pmap->pm_stats.resident_count == 0) return; + pmap_inval_init(&info); + /* * special handling of removing one page. a very * common operation and easy to short circuit some @@ -1663,12 +1620,11 @@ pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) */ if (((sva + PAGE_SIZE) == eva) && (((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { - pmap_remove_page(pmap, sva); + pmap_remove_page(pmap, sva, &info); + pmap_inval_flush(&info); return; } - anyvalid = 0; - /* * Get a local virtual address for the mappings that are being * worked with. @@ -1690,9 +1646,9 @@ pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) pdirindex = sindex / NPDEPG; if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) { + pmap_inval_add(&info, pmap, -1); pmap->pm_pdir[pdirindex] = 0; pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; - anyvalid++; continue; } @@ -1712,22 +1668,16 @@ pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) pdnxt = eindex; } - for ( ;sindex != pdnxt; sindex++) { + for (; sindex != pdnxt; sindex++) { vm_offset_t va; - if (ptbase[sindex] == 0) { + if (ptbase[sindex] == 0) continue; - } va = i386_ptob(sindex); - - anyvalid++; - if (pmap_remove_pte(pmap, - ptbase + sindex, va)) + if (pmap_remove_pte(pmap, ptbase + sindex, va, &info)) break; } } - - if (anyvalid) - pmap_TLB_invalidate_all(pmap); + pmap_inval_flush(&info); } /* @@ -1742,8 +1692,9 @@ pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) static void pmap_remove_all(vm_page_t m) { - pv_entry_t pv; + struct pmap_inval_info info; unsigned *pte, tpte; + pv_entry_t pv; int s; #if defined(PMAP_DIAGNOSTIC) @@ -1756,11 +1707,13 @@ pmap_remove_all(vm_page_t m) } #endif + pmap_inval_init(&info); s = splvm(); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pv->pv_pmap->pm_stats.resident_count--; pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); + pmap_inval_add(&info, pv->pv_pmap, pv->pv_va); tpte = loadandclear(pte); if (tpte & PG_W) @@ -1783,18 +1736,16 @@ pmap_remove_all(vm_page_t m) if (pmap_track_modified(pv->pv_va)) vm_page_dirty(m); } - pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va); - TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); m->md.pv_list_count--; - pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); + pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem, &info); free_pv_entry(pv); } vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); - splx(s); + pmap_inval_flush(&info); } /* @@ -1812,7 +1763,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) unsigned *ptbase; vm_offset_t pdnxt, ptpaddr; vm_pindex_t sindex, eindex; - int anychanged; + pmap_inval_info info; if (pmap == NULL) return; @@ -1825,7 +1776,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) if (prot & VM_PROT_WRITE) return; - anychanged = 0; + pmap_inval_init(&info); ptbase = get_ptbase(pmap); @@ -1840,9 +1791,9 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) pdirindex = sindex / NPDEPG; if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) { + pmap_inval_add(&info, pmap, -1); (unsigned) pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; - anychanged++; continue; } @@ -1862,6 +1813,8 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) unsigned pbits; vm_page_t m; + /* XXX this isn't optimal */ + pmap_inval_add(&info, pmap, i386_ptob(sindex)); pbits = ptbase[sindex]; if (pbits & PG_MANAGED) { @@ -1885,12 +1838,10 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) if (pbits != ptbase[sindex]) { ptbase[sindex] = pbits; - anychanged = 1; } } } - if (anychanged) - pmap_TLB_invalidate_all(pmap); + pmap_inval_flush(&info); } /* @@ -1914,6 +1865,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_paddr_t opa; vm_offset_t origpte, newpte; vm_page_t mpte; + pmap_inval_info info; if (pmap == NULL) return; @@ -1934,26 +1886,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, if (va < UPT_MIN_ADDRESS) { mpte = pmap_allocpte(pmap, va); } -#if 0 && defined(PMAP_DIAGNOSTIC) - else { - vm_offset_t *pdeaddr = (vm_offset_t *)pmap_pde(pmap, va); - if (((origpte = (vm_offset_t) *pdeaddr) & PG_V) == 0) { - panic("pmap_enter: invalid kernel page table page(0), pdir=%p, pde=%p, va=%p\n", - pmap->pm_pdir[PTDPTDI], origpte, va); - } - if (smp_active) { - pdeaddr = (vm_offset_t *) IdlePTDS[cpuid]; - if (((newpte = pdeaddr[va >> PDRSHIFT]) & PG_V) == 0) { - if ((vm_offset_t) my_idlePTD != (vm_offset_t) vtophys(pdeaddr)) - printf("pde mismatch: %x, %x\n", my_idlePTD, pdeaddr); - printf("cpuid: %d, pdeaddr: 0x%x\n", cpuid, pdeaddr); - panic("pmap_enter: invalid kernel page table page(1), pdir=%p, npde=%p, pde=%p, va=%p\n", - pmap->pm_pdir[PTDPTDI], newpte, origpte, va); - } - } - } -#endif + pmap_inval_init(&info); pte = pmap_pte(pmap, va); /* @@ -1965,6 +1899,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, } pa = VM_PAGE_TO_PHYS(m) & PG_FRAME; + pmap_inval_add(&info, pmap, va); /* XXX non-optimal */ origpte = *(vm_offset_t *)pte; opa = origpte & PG_FRAME; @@ -2001,16 +1936,9 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, mpte->hold_count--; if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) { - if ((origpte & PG_RW) == 0) { + if ((origpte & PG_RW) == 0) *pte |= PG_RW; -#ifdef SMP - cpu_invlpg((void *)va); - if (pmap->pm_active & mycpu->gd_other_cpus) - smp_invltlb(); -#else - invltlb_1pg(va); -#endif - } + pmap_inval_flush(&info); return; } @@ -2034,7 +1962,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, */ if (opa) { int err; - err = pmap_remove_pte(pmap, pte, va); + err = pmap_remove_pte(pmap, pte, va, &info); if (err) panic("pmap_enter: pte vanished, va: 0x%x", va); } @@ -2076,16 +2004,8 @@ validate: */ if ((origpte & ~(PG_M|PG_A)) != newpte) { *pte = newpte | PG_A; - /*if (origpte)*/ { -#ifdef SMP - cpu_invlpg((void *)va); - if (pmap->pm_active & mycpu->gd_other_cpus) - smp_invltlb(); -#else - invltlb_1pg(va); -#endif - } } + pmap_inval_flush(&info); } /* @@ -2104,6 +2024,9 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte) { unsigned *pte; vm_paddr_t pa; + pmap_inval_info info; + + pmap_inval_init(&info); /* * In the case that a page table page is not @@ -2160,7 +2083,7 @@ retry: pte = (unsigned *)vtopte(va); if (*pte) { if (mpte) - pmap_unwire_pte_hold(pmap, mpte); + pmap_unwire_pte_hold(pmap, mpte, &info); return 0; } @@ -2273,7 +2196,8 @@ retry: ptepindex += 1; } vm_page_flag_set(p, PG_MAPPED); - invltlb(); + cpu_invltlb(); + smp_invltlb(); return; } @@ -2360,7 +2284,6 @@ retry: } } } - return; } /* @@ -2477,9 +2400,22 @@ pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) /* * Wiring is not a hardware characteristic so there is no need to - * invalidate TLB. + * invalidate TLB. However, in an SMP environment we must use + * a locked bus cycle to update the pte (if we are not using + * the pmap_inval_*() API that is)... it's ok to do this for simple + * wiring changes. */ - pmap_pte_set_w(pte, wired); +#ifdef SMP + if (wired) + atomic_set_int(pte, PG_W); + else + atomic_clear_int(pte, PG_W); +#else + if (wired) + atomic_set_int_nonlocked(pte, PG_W); + else + atomic_clear_int_nonlocked(pte, PG_W); +#endif } @@ -2495,6 +2431,7 @@ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { + pmap_inval_info info; vm_offset_t addr; vm_offset_t end_addr = src_addr + len; vm_offset_t pdnxt; @@ -2512,13 +2449,12 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME; if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) { APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V); -#if defined(SMP) /* The page directory is not shared between CPUs */ cpu_invltlb(); -#else - invltlb(); -#endif } + pmap_inval_init(&info); + pmap_inval_add(&info, dst_pmap, -1); + pmap_inval_add(&info, src_pmap, -1); for(addr = src_addr; addr < end_addr; addr = pdnxt) { unsigned *src_pte, *dst_pte; @@ -2588,7 +2524,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, pmap_insert_entry(dst_pmap, addr, dstmpte, m); } else { - pmap_unwire_pte_hold(dst_pmap, dstmpte); + pmap_unwire_pte_hold(dst_pmap, dstmpte, &info); } if (dstmpte->hold_count >= srcmpte->hold_count) break; @@ -2598,6 +2534,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, dst_pte++; } } + pmap_inval_flush(&info); } /* @@ -2803,6 +2740,7 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) pv_entry_t pv, npv; int s; vm_page_t m; + pmap_inval_info info; #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace))) { @@ -2811,6 +2749,7 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) } #endif + pmap_inval_init(&info); s = splvm(); for(pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; @@ -2826,6 +2765,7 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) #else pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); #endif + pmap_inval_add(&info, pv->pv_pmap, pv->pv_va); tpte = *pte; /* @@ -2861,11 +2801,11 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); } - pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); + pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem, &info); free_pv_entry(pv); } + pmap_inval_flush(&info); splx(s); - pmap_TLB_invalidate_all(pmap); } /* @@ -2921,6 +2861,7 @@ pmap_testbit(vm_page_t m, int bit) static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem) { + struct pmap_inval_info info; pv_entry_t pv; unsigned *pte; int s; @@ -2928,6 +2869,7 @@ pmap_changebit(vm_page_t m, int bit, boolean_t setem) if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) return; + pmap_inval_init(&info); s = splvm(); /* @@ -2950,11 +2892,21 @@ pmap_changebit(vm_page_t m, int bit, boolean_t setem) } #endif + /* + * Careful here. We can use a locked bus instruction to + * clear PG_A or PG_M safely but we need to synchronize + * with the target cpus when we mess with PG_RW. + */ pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); + if (bit == PG_RW) + pmap_inval_add(&info, pv->pv_pmap, pv->pv_va); if (setem) { - *(int *)pte |= bit; - pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va); +#ifdef SMP + atomic_set_int(pte, bit); +#else + atomic_set_int_nonlocked(pte, bit); +#endif } else { vm_offset_t pbits = *(vm_offset_t *)pte; if (pbits & bit) { @@ -2962,14 +2914,22 @@ pmap_changebit(vm_page_t m, int bit, boolean_t setem) if (pbits & PG_M) { vm_page_dirty(m); } - *(int *)pte = pbits & ~(PG_M|PG_RW); +#ifdef SMP + atomic_clear_int(pte, PG_M|PG_RW); +#else + atomic_clear_int_nonlocked(pte, PG_M|PG_RW); +#endif } else { - *(int *)pte = pbits & ~bit; +#ifdef SMP + atomic_clear_int(pte, bit); +#else + atomic_clear_int_nonlocked(pte, bit); +#endif } - pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va); } } } + pmap_inval_flush(&info); splx(s); } @@ -3038,10 +2998,11 @@ pmap_ts_referenced(vm_page_t m) pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); if (pte && (*pte & PG_A)) { - *pte &= ~PG_A; - - pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va); - +#ifdef SMP + atomic_clear_int(pte, PG_A); +#else + atomic_clear_int_nonlocked(pte, PG_A); +#endif rtval++; if (rtval > 4) { break; @@ -3148,7 +3109,8 @@ pmap_mapdev(vm_paddr_t pa, vm_size_t size) tmpva += PAGE_SIZE; pa += PAGE_SIZE; } - invltlb(); + cpu_invltlb(); + smp_invltlb(); return ((void *)(va + offset)); } diff --git a/sys/platform/pc32/i386/pmap_inval.c b/sys/platform/pc32/i386/pmap_inval.c new file mode 100644 index 0000000000..74da0d7c47 --- /dev/null +++ b/sys/platform/pc32/i386/pmap_inval.c @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2003 Matthew Dillon + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/platform/pc32/i386/pmap_inval.c,v 1.1 2004/02/17 19:38:53 dillon Exp $ + */ + +/* + * pmap invalidation support code. Certain hardware requirements must + * be dealt with when manipulating page table entries and page directory + * entries within a pmap. In particular, we cannot safely manipulate + * page tables which are in active use by another cpu (even if it is + * running in userland) for two reasons: First, TLB writebacks will + * race against our own modifications and tests. Second, even if we + * were to use bus-locked instruction we can still screw up the + * target cpu's instruction pipeline due to Intel cpu errata. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#if defined(SMP) || defined(APIC_IO) +#include +#include +#endif /* SMP || APIC_IO */ +#include +#include +#include + +#ifdef SMP + +static void +_cpu_invltlb(void *dummy) +{ + cpu_invltlb(); +} + +static void +_cpu_invl1pg(void *data) +{ + cpu_invlpg(data); +} + +#endif + +/* + * Initialize for add or flush + */ +void +pmap_inval_init(pmap_inval_info_t info) +{ + info->pir_flags = 0; +} + +/* + * Add a (pmap, va) pair to the invalidation list and protect access + * as appropriate. + */ +void +pmap_inval_add(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) +{ +#ifdef SMP + if ((info->pir_flags & PIRF_CPUSYNC) == 0) { + info->pir_flags |= PIRF_CPUSYNC; + info->pir_cpusync.cs_run_func = NULL; + info->pir_cpusync.cs_fin1_func = NULL; + info->pir_cpusync.cs_fin2_func = NULL; + lwkt_cpusync_start(pmap->pm_active, &info->pir_cpusync); + } else if (pmap->pm_active & ~info->pir_cpusync.cs_mask) { + lwkt_cpusync_add(pmap->pm_active, &info->pir_cpusync); + } +#else + if (pmap->pm_active == 0) + return; +#endif + if ((info->pir_flags & (PIRF_INVLTLB|PIRF_INVL1PG)) == 0) { + if (va == (vm_offset_t)-1) { + info->pir_flags |= PIRF_INVLTLB; +#ifdef SMP + info->pir_cpusync.cs_fin2_func = _cpu_invltlb; +#endif + } else { + info->pir_flags |= PIRF_INVL1PG; + info->pir_cpusync.cs_data = (void *)va; +#ifdef SMP + info->pir_cpusync.cs_fin2_func = _cpu_invl1pg; +#endif + } + } else { + info->pir_flags |= PIRF_INVLTLB; +#ifdef SMP + info->pir_cpusync.cs_fin2_func = _cpu_invltlb; +#endif + } +} + +/* + * Synchronize changes with target cpus. + */ +void +pmap_inval_flush(pmap_inval_info_t info) +{ +#ifdef SMP + if (info->pir_flags & PIRF_CPUSYNC) + lwkt_cpusync_finish(&info->pir_cpusync); +#else + if (info->pir_flags & PIRF_INVLTLB) + cpu_invltlb(); + else if (info->pir_flags & PIRF_INVL1PG) + cpu_invlpg(info->pir_cpusync.cs_data); +#endif + info->pir_flags = 0; +} + diff --git a/sys/platform/pc32/i386/vm_machdep.c b/sys/platform/pc32/i386/vm_machdep.c index 37dfaf35fd..5bd8c46abe 100644 --- a/sys/platform/pc32/i386/vm_machdep.c +++ b/sys/platform/pc32/i386/vm_machdep.c @@ -39,7 +39,7 @@ * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ * $FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.132.2.9 2003/01/25 19:02:23 dillon Exp $ - * $DragonFly: src/sys/platform/pc32/i386/vm_machdep.c,v 1.26 2003/12/20 05:52:26 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/vm_machdep.c,v 1.27 2004/02/17 19:38:53 dillon Exp $ */ #include "use_npx.h" @@ -417,16 +417,15 @@ void cpu_reset() { #ifdef SMP - if (smp_active == 0) { + if (smp_active_mask == 1) { cpu_reset_real(); /* NOTREACHED */ } else { - u_int map; int cnt; printf("cpu_reset called on cpu#%d\n",mycpu->gd_cpuid); - map = mycpu->gd_other_cpus & ~ stopped_cpus; + map = mycpu->gd_other_cpus & ~stopped_cpus & smp_active_mask; if (map != 0) { printf("cpu_reset: Stopping other CPUs\n"); @@ -502,7 +501,7 @@ cpu_reset_real() bzero((caddr_t) PTD, PAGE_SIZE); /* "good night, sweet prince .... " */ - invltlb(); + cpu_invltlb(); /* NOTREACHED */ while(1); } diff --git a/sys/platform/pc32/include/globaldata.h b/sys/platform/pc32/include/globaldata.h index e2826d0133..75db0086b2 100644 --- a/sys/platform/pc32/include/globaldata.h +++ b/sys/platform/pc32/include/globaldata.h @@ -28,7 +28,7 @@ * should not include this file. * * $FreeBSD: src/sys/i386/include/globaldata.h,v 1.11.2.1 2000/05/16 06:58:10 dillon Exp $ - * $DragonFly: src/sys/platform/pc32/include/globaldata.h,v 1.21 2003/12/20 05:52:27 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/include/globaldata.h,v 1.22 2004/02/17 19:38:53 dillon Exp $ */ #ifndef _MACHINE_GLOBALDATA_H_ @@ -72,7 +72,7 @@ struct mdglobaldata { int gd_idelayed; /* delayed software ints */ int gd_currentldt; int gd_private_tss; - u_int gd_cpu_lockid; + u_int unused001; u_int gd_other_cpus; u_int gd_ss_eflags; pt_entry_t *gd_CMAP1; diff --git a/sys/platform/pc32/include/mpapic.h b/sys/platform/pc32/include/mpapic.h index f979f28bad..2ebeec4548 100644 --- a/sys/platform/pc32/include/mpapic.h +++ b/sys/platform/pc32/include/mpapic.h @@ -23,7 +23,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/mpapic.h,v 1.14.2.2 2000/09/30 02:49:34 ps Exp $ - * $DragonFly: src/sys/platform/pc32/include/Attic/mpapic.h,v 1.4 2003/08/07 21:17:22 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/include/Attic/mpapic.h,v 1.5 2004/02/17 19:38:53 dillon Exp $ */ #ifndef _MACHINE_MPAPIC_H_ @@ -74,33 +74,15 @@ selected_procs_ipi(int targetMap, int vector) return selected_apic_ipi(targetMap, vector, APIC_DELMODE_FIXED); } -/* - * send an IPI INTerrupt containing 'vector' to all CPUs, including myself - */ -static __inline int -all_procs_ipi(int vector) -{ - return apic_ipi(APIC_DEST_ALLISELF, vector, APIC_DELMODE_FIXED); -} - /* * send an IPI INTerrupt containing 'vector' to all CPUs EXCEPT myself */ static __inline int all_but_self_ipi(int vector) { - if (ncpus <= 1) + if (smp_active_mask == 1) return 0; return apic_ipi(APIC_DEST_ALLESELF, vector, APIC_DELMODE_FIXED); } -/* - * send an IPI INTerrupt containing 'vector' to myself - */ -static __inline int -self_ipi(int vector) -{ - return apic_ipi(APIC_DEST_SELF, vector, APIC_DELMODE_FIXED); -} - #endif /* _MACHINE_MPAPIC_H */ diff --git a/sys/platform/pc32/include/pmap_inval.h b/sys/platform/pc32/include/pmap_inval.h new file mode 100644 index 0000000000..1762cf4442 --- /dev/null +++ b/sys/platform/pc32/include/pmap_inval.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2003 Matthew Dillon + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/platform/pc32/include/pmap_inval.h,v 1.1 2004/02/17 19:38:54 dillon Exp $ + */ + +#ifndef _MACHINE_PMAP_INVAL_H_ +#define _MACHINE_PMAP_INVAL_H_ + +typedef struct pmap_inval_info { + int pir_flags; + struct lwkt_cpusync pir_cpusync; +} pmap_inval_info; + +typedef pmap_inval_info *pmap_inval_info_t; + +#define PIRF_INVLTLB 0x0001 /* request invalidation of whole table */ +#define PIRF_INVL1PG 0x0002 /* else request invalidation of one page */ +#define PIRF_CPUSYNC 0x0004 /* cpusync is currently active */ + +#ifdef _KERNEL + +void pmap_inval_init(pmap_inval_info_t); +void pmap_inval_add(pmap_inval_info_t, pmap_t, vm_offset_t); +void pmap_inval_flush(pmap_inval_info_t); + +#endif + +#endif diff --git a/sys/platform/pc32/include/smp.h b/sys/platform/pc32/include/smp.h index 63d85cdb26..46d3eb7394 100644 --- a/sys/platform/pc32/include/smp.h +++ b/sys/platform/pc32/include/smp.h @@ -7,7 +7,7 @@ * ---------------------------------------------------------------------------- * * $FreeBSD: src/sys/i386/include/smp.h,v 1.50.2.5 2001/02/13 22:32:45 tegge Exp $ - * $DragonFly: src/sys/platform/pc32/include/smp.h,v 1.8 2003/11/03 02:08:33 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/include/smp.h,v 1.9 2004/02/17 19:38:54 dillon Exp $ * */ @@ -81,7 +81,6 @@ struct apic_intmapinfo { int redirindex; }; extern struct apic_intmapinfo int_to_apicintpin[]; -extern u_int all_cpus; extern struct pcb stoppcbs[]; /* functions in mp_machdep.c */ @@ -109,10 +108,6 @@ void init_secondary (void); int stop_cpus (u_int); void ap_init (void); int restart_cpus (u_int); -#ifdef BETTER_CLOCK -void forward_statclock (int pscnt); -void forward_hardclock (int pscnt); -#endif /* BETTER_CLOCK */ void forward_signal (struct proc *); void forward_roundrobin (void); #ifdef APIC_INTR_REORDER @@ -151,15 +146,12 @@ void u_sleep (int); void cpu_send_ipiq (int); /* global data in init_smp.c */ -extern int invltlb_ok; -extern int smp_active; -extern int smp_started; -extern volatile int smp_idle_loops; +extern cpumask_t smp_active_mask; #endif /* !LOCORE */ #else /* !SMP && !APIC_IO */ -#define smp_active 0 /* smp_active always 0 on UP machines */ +#define smp_active_mask 1 /* smp_active_mask always 1 on UP machines */ #endif diff --git a/sys/platform/pc32/include/smptests.h b/sys/platform/pc32/include/smptests.h index add6132e31..6fc8ab49b2 100644 --- a/sys/platform/pc32/include/smptests.h +++ b/sys/platform/pc32/include/smptests.h @@ -23,7 +23,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/smptests.h,v 1.33.2.1 2000/05/16 06:58:10 dillon Exp $ - * $DragonFly: src/sys/platform/pc32/include/Attic/smptests.h,v 1.3 2003/07/06 21:23:49 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/include/Attic/smptests.h,v 1.4 2004/02/17 19:38:54 dillon Exp $ */ #ifndef _MACHINE_SMPTESTS_H_ @@ -34,28 +34,6 @@ * Various 'tests in progress' and configuration parameters. */ - -/* - * Tor's clock improvements. - * - * When the giant kernel lock disappears, a different strategy should - * probably be used, thus this patch can only be considered a temporary - * measure. - * - * This patch causes (NCPU-1)*(128+100) extra IPIs per second. - * During profiling, the number is (NCPU-1)*(1024+100) extra IPIs/s - * in addition to extra IPIs due to forwarding ASTs to other CPUs. - * - * Having a shared AST flag in an SMP configuration is wrong, and I've - * just kludged around it, based upon the kernel lock blocking other - * processors from entering the kernel while handling an AST for one - * processor. When the giant kernel lock disappers, this kludge breaks. - * - * -- Tor - */ -#define BETTER_CLOCK - - /* * Control the "giant lock" pushdown by logical steps. */ diff --git a/sys/platform/pc32/isa/apic_vector.s b/sys/platform/pc32/isa/apic_vector.s index af1fdc33fd..c1cece5da8 100644 --- a/sys/platform/pc32/isa/apic_vector.s +++ b/sys/platform/pc32/isa/apic_vector.s @@ -1,7 +1,7 @@ /* * from: vector.s, 386BSD 0.1 unknown origin * $FreeBSD: src/sys/i386/isa/apic_vector.s,v 1.47.2.5 2001/09/01 22:33:38 tegge Exp $ - * $DragonFly: src/sys/platform/pc32/isa/Attic/apic_vector.s,v 1.16 2004/02/12 06:57:46 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/isa/Attic/apic_vector.s,v 1.17 2004/02/17 19:38:54 dillon Exp $ */ @@ -338,69 +338,6 @@ Xinvltlb: iret -#if 0 -#ifdef BETTER_CLOCK - -/* - * Executed by a CPU when it receives an Xcpucheckstate IPI from another CPU, - * - * - Stores current cpu state in checkstate_cpustate[cpuid] - * 0 == user, 1 == sys, 2 == intr - * - Stores current process in checkstate_curproc[cpuid] - * - * - Signals its receipt by setting bit cpuid in checkstate_probed_cpus. - * - * stack: 0->ds, 4->fs, 8->ebx, 12->eax, 16->eip, 20->cs, 24->eflags - */ - - .text - SUPERALIGN_TEXT - .globl Xcpucheckstate - .globl checkstate_cpustate - .globl checkstate_curproc - .globl checkstate_pc -Xcpucheckstate: - pushl %eax - pushl %ebx - pushl %ds /* save current data segment */ - pushl %fs - - movl $KDSEL, %eax - mov %ax, %ds /* use KERNEL data segment */ - movl $KPSEL, %eax - mov %ax, %fs - - movl $0, lapic_eoi /* End Of Interrupt to APIC */ - - movl $0, %ebx - movl 20(%esp), %eax - andl $3, %eax - cmpl $3, %eax - je 1f - testl $PSL_VM, 24(%esp) - jne 1f - incl %ebx /* system or interrupt */ -1: - movl PCPU(cpuid), %eax - movl %ebx, checkstate_cpustate(,%eax,4) - movl PCPU(curthread), %ebx - movl TD_PROC(%ebx),%ebx - movl %ebx, checkstate_curproc(,%eax,4) - movl 16(%esp), %ebx - movl %ebx, checkstate_pc(,%eax,4) - - lock /* checkstate_probed_cpus |= (1< @@ -201,7 +201,6 @@ ASSYM(GD_COMMON_TSS, offsetof(struct mdglobaldata, gd_common_tss)); ASSYM(GD_COMMON_TSSD, offsetof(struct mdglobaldata, gd_common_tssd)); ASSYM(GD_TSS_GDT, offsetof(struct mdglobaldata, gd_tss_gdt)); ASSYM(GD_NPXTHREAD, offsetof(struct mdglobaldata, gd_npxthread)); -ASSYM(GD_CPU_LOCKID, offsetof(struct mdglobaldata, gd_cpu_lockid)); ASSYM(GD_OTHER_CPUS, offsetof(struct mdglobaldata, gd_other_cpus)); ASSYM(GD_SS_EFLAGS, offsetof(struct mdglobaldata, gd_ss_eflags)); ASSYM(GD_CMAP1, offsetof(struct mdglobaldata, gd_CMAP1)); diff --git a/sys/sys/thread.h b/sys/sys/thread.h index 550736e745..ad9338ff6f 100644 --- a/sys/sys/thread.h +++ b/sys/sys/thread.h @@ -7,7 +7,7 @@ * Types which must already be defined when this header is included by * userland: struct md_thread * - * $DragonFly: src/sys/sys/thread.h,v 1.45 2004/02/15 05:15:27 dillon Exp $ + * $DragonFly: src/sys/sys/thread.h,v 1.46 2004/02/17 19:38:50 dillon Exp $ */ #ifndef _SYS_THREAD_H_ @@ -199,8 +199,10 @@ struct thread { int td_nest_count; /* prevent splz nesting */ #ifdef SMP int td_mpcount; /* MP lock held (count) */ + int td_cscount; /* cpu synchronization master */ #else int td_unused001; + int td_unused002; #endif char td_comm[MAXCOMLEN+1]; /* typ 16+1 bytes */ struct thread *td_preempted; /* we preempted this thread */ -- 2.41.0