kernel - Clean up spinlock code, add more lwkt_yield()s
authorMatthew Dillon <dillon@apollo.backplane.com>
Fri, 28 Oct 2011 16:29:28 +0000 (09:29 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Fri, 28 Oct 2011 16:29:28 +0000 (09:29 -0700)
* Clean up some of the critical path in the spin_unlock() API

* Add a few more lwkt_yield()s in the buffer cache and vm_object cleaning
  code.

sys/kern/kern_spinlock.c
sys/kern/vfs_bio.c
sys/platform/pc64/x86_64/pmap.c
sys/sys/spinlock2.h
sys/vm/vm_object.c
sys/vm/vnode_pager.c

index a736eed..d836833 100644 (file)
@@ -149,17 +149,17 @@ spin_trylock_contested(struct spinlock *spin)
  * (for intel/amd anyhow) is not strictly needed as cache bus resource use
  * is governed by the lazy update.
  *
- * WARNING!!!!  Performance matters here, by a huge margin.  There are still
- *             a few bottlenecks in the kernel (e.g. the PQ_INACTIVE
- *             vm_page_queue) where things like parallel compiles hit up
- *             against full all-cores contention right here.
+ * WARNING!!!!  Performance matters here, by a huge margin.
  *
- *             48-core test with pre-read / -j 48 no-modules kernel compile
- *             came in at 75 seconds.  Without pre-read it came in at 170 seconds.
+ *     48-core test with pre-read / -j 48 no-modules kernel compile
+ *     with fanned-out inactive and active queues came in at 55 seconds.
  *
- *             4-core test with pre-read / -j 48 no-modules kernel compile
- *             came in at 83 seconds.  Without pre-read it came in at 83 seconds
- *             as well (no difference).
+ *     48-core test with pre-read / -j 48 no-modules kernel compile
+ *     came in at 75 seconds.  Without pre-read it came in at 170 seconds.
+ *
+ *     4-core test with pre-read / -j 48 no-modules kernel compile
+ *     came in at 83 seconds.  Without pre-read it came in at 83 seconds
+ *     as well (no difference).
  */
 void
 spin_lock_contested(struct spinlock *spin)
index 0e133b7..2c82b0e 100644 (file)
@@ -2694,6 +2694,7 @@ flushbufqueues(bufq_type_t q)
                }
 
                spin_unlock(&bufqspin);
+               lwkt_yield();
                spun = 0;
 
                if (LIST_FIRST(&bp->b_dep) != NULL &&
index ffda71f..2bed171 100644 (file)
@@ -2739,6 +2739,7 @@ kernel_skip:
                         *       the potentially conflicting pv and
                         *       re-checking.
                         */
+                       lwkt_yield();
                        if (*ptep == 0) {
                                pte_pv = pv_find(pmap, pmap_pte_pindex(sva));
                                if (pte_pv == NULL) {
@@ -3349,6 +3350,7 @@ pmap_object_init_pt_callback(vm_page_t p, void *data)
                                 info->addr + x86_64_ptob(rel_index), p);
        }
        vm_page_wakeup(p);
+       lwkt_yield();
        return(0);
 }
 
index 85811d5..f4ad337 100644 (file)
@@ -172,18 +172,24 @@ spin_unlock_quick(globaldata_t gd, struct spinlock *spin)
        }
 #endif
        /*
-        * Don't use a locked instruction here.
+        * Don't use a locked instruction here.  To reduce latency we avoid
+        * reading spin->counta prior to writing to it.
         */
+#ifdef DEBUG_LOCKS
        KKASSERT(spin->counta != 0);
+#endif
        cpu_sfence();
        spin->counta = 0;
        cpu_sfence();
 #endif
+#ifdef DEBUG_LOCKS
        KKASSERT(gd->gd_spinlocks_wr > 0);
+#endif
        --gd->gd_spinlocks_wr;
        cpu_ccfence();
        --gd->gd_curthread->td_critcount;
 #if 0
+       /* FUTURE */
        if (__predict_false(gd->gd_reqflags & RQF_IDLECHECK_MASK))
                lwkt_maybe_splz(gd->gd_curthread);
 #endif
index 7487d6f..cc2fd00 100644 (file)
@@ -1013,6 +1013,7 @@ vm_object_page_clean_pass1(struct vm_page *p, void *data)
        } else {
                info->error = 1;
        }
+       lwkt_yield();
        return(0);
 }
 
@@ -1031,7 +1032,7 @@ vm_object_page_clean_pass2(struct vm_page *p, void *data)
         * the cleaning pass.
         */
        if ((p->flags & PG_CLEANCHK) == 0)
-               return(0);
+               goto done;
 
        generation = info->object->generation;
        vm_page_busy_wait(p, TRUE, "vpcwai");
@@ -1039,7 +1040,7 @@ vm_object_page_clean_pass2(struct vm_page *p, void *data)
            info->object->generation != generation) {
                info->error = 1;
                vm_page_wakeup(p);
-               return(0);
+               goto done;
        }
 
        /*
@@ -1049,7 +1050,7 @@ vm_object_page_clean_pass2(struct vm_page *p, void *data)
        if (p->valid == 0 || (p->queue - p->pc) == PQ_CACHE) {
                KKASSERT((p->dirty & p->valid) == 0);
                vm_page_wakeup(p);
-               return(0);
+               goto done;
        }
 
        /*
@@ -1061,7 +1062,7 @@ vm_object_page_clean_pass2(struct vm_page *p, void *data)
        if ((p->dirty & p->valid) == 0) {
                vm_page_flag_clear(p, PG_CLEANCHK);
                vm_page_wakeup(p);
-               return(0);
+               goto done;
        }
 
        /*
@@ -1073,7 +1074,7 @@ vm_object_page_clean_pass2(struct vm_page *p, void *data)
        if ((info->limit & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) {
                vm_page_flag_clear(p, PG_CLEANCHK);
                vm_page_wakeup(p);
-               return(0);
+               goto done;
        }
 
        /*
@@ -1082,6 +1083,8 @@ vm_object_page_clean_pass2(struct vm_page *p, void *data)
         * we raced an object modification.
         */
        vm_object_page_collect_flush(info->object, p, info->pagerflags);
+done:
+       lwkt_yield();
        return(0);
 }
 
index 9a589d4..040f4d6 100644 (file)
@@ -661,7 +661,9 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *mpp, int bytecount,
                        }
                } else if (mt->valid == 0) {
                        if (error == 0) {
-                               kprintf("page failed but no I/O error page %p object %p pindex %d\n", mt, mt->object, (int) mt->pindex);
+                               kprintf("page failed but no I/O error page "
+                                       "%p object %p pindex %d\n",
+                                       mt, mt->object, (int) mt->pindex);
                                /* whoops, something happened */
                                error = EINVAL;
                        }