rtld-elf - Notify thread state to optimize relocations
authorMatthew Dillon <dillon@apollo.backplane.com>
Sun, 12 May 2019 04:01:55 +0000 (21:01 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Sun, 12 May 2019 04:07:40 +0000 (21:07 -0700)
* Add shims to allow libthread_xu to notify rtld when threading
  is being used.

* Requires weak symbols in libc which are overriden by rtld-elf.

* Implement the feature in rtld-elf and use it to avoid making calls
  to lwp_gettid().  When threaded, use tls_get_tcb() (which does not
  require a system call) instead of lwp_gettid().  When not threaded,
  just use a constant.

  NOTE: We cannot use tls_get_tcb() unconditionally because the tcb
is not setup during early relocations.  So do this whack-a-mole
to make it work.

* This leaves just the sigprocmask wrappers around rtld-elf (which
  are needed to prevent stacked relocations from signal handlers).

Poked-by: mjg
lib/libc/gen/Symbol.map
lib/libc/gen/dlfcn.c
lib/libthread_xu/thread/thr_kern.c
lib/libthread_xu/thread/thr_private.h
libexec/rtld-elf/Symbol.map
libexec/rtld-elf/rtld.c
libexec/rtld-elf/rtld_lock.c

index 05cec48..fdeec56 100644 (file)
@@ -693,6 +693,7 @@ DFprivate_1.0 {
     _rtld_thread_prefork;
     _rtld_thread_postfork;
     _rtld_thread_childfork;
+    _rtld_setthreaded;
     _thread_finalize;
     _seekdir;
     _setcontext;
index 0080c1e..28c1ece 100644 (file)
@@ -38,6 +38,7 @@ void _rtld_thread_childfork(void);
 void _rtld_thread_init(void *);
 void _rtld_thread_postfork(void);
 void _rtld_thread_prefork(void);
+void _rtld_setthreaded(void);
 
 extern char **environ;
 
@@ -226,3 +227,10 @@ void
 _rtld_thread_childfork(void)
 {
 }
+
+#pragma weak _rtld_setthreaded
+int write(int, void *, size_t);
+void
+_rtld_setthreaded(void)
+{
+}
index c15c2a3..410af80 100644 (file)
@@ -51,6 +51,7 @@ _thr_setthreaded(int threaded)
        if (((threaded == 0) ^ (__isthreaded == 0)) == 0)
                return (0);
        __isthreaded = threaded;
+       _rtld_setthreaded(threaded);
 #if 0
        /* save for later. */
        if (threaded != 0)
index 97b2733..bc08a7a 100644 (file)
@@ -678,6 +678,7 @@ void        _thr_ref_delete(struct pthread *, struct pthread *);
 void   _thr_ref_delete_unlocked(struct pthread *, struct pthread *);
 int    _thr_find_thread(struct pthread *, struct pthread *, int);
 void   _thr_malloc_init(void);
+void   _rtld_setthreaded(int);
 void   _thr_rtld_init(void);
 void   _thr_rtld_fini(void);
 int    _thr_stack_alloc(struct pthread_attr *);
index 3490faf..5412719 100644 (file)
@@ -26,6 +26,7 @@ DFprivate_1.0 {
     _rtld_thread_prefork;
     _rtld_thread_postfork;
     _rtld_thread_childfork;
+    _rtld_setthreaded;
     _rtld_addr_phdr;
     _rtld_get_stack_prot;
     _r_debug_postinit;
index 9d59961..4eefb12 100644 (file)
@@ -4585,6 +4585,7 @@ _rtld_allocate_tls(void)
     wlock_acquire(rtld_bind_lock, &lockstate);
     new_tcb = allocate_tls(obj_list);
     lock_release(rtld_bind_lock, &lockstate);
+
     return (new_tcb);
 }
 
index 124b45a..d6165b1 100644 (file)
@@ -50,6 +50,9 @@
 #include <stdio.h>
 #include <sys/file.h>
 
+#include <machine/sysarch.h>
+#include <machine/tls.h>
+
 #include "debug.h"
 #include "rtld.h"
 #include "rtld_machdep.h"
 extern pid_t __sys_getpid(void);
 
 #define WAFLAG         0x1     /* A writer holds the lock */
-#define RC_INCR                0x2     /* Adjusts count of readers desiring lock */
+#define SLFLAG         0x2     /* Sleep pending on lock */
+#define RC_INCR                0x4     /* Adjusts count of readers desiring lock */
 
 struct Struct_Lock {
        volatile u_int lock;
-       int tid;                /* owner (exclusive) */
        int count;              /* recursion (exclusive) */
+       void *owner;            /* owner (exclusive) - tls_get_tcb() */
        sigset_t savesigmask;   /* first exclusive owner sets mask */
 } __cachealign;
 
@@ -78,11 +82,31 @@ rtld_lock_t rtld_phdr_lock = &phdr_lock;
 rtld_lock_t    rtld_bind_lock = &bind_lock;
 rtld_lock_t    rtld_libc_lock = &libc_lock;
 
+static int _rtld_isthreaded;
+
+void _rtld_setthreaded(int threaded);
+
+void
+_rtld_setthreaded(int threaded)
+{
+       _rtld_isthreaded = threaded;
+}
+
+static __inline
+void *
+myid(void)
+{
+       if (_rtld_isthreaded) {
+               return(tls_get_tcb());
+       }
+       return (void *)(intptr_t)1;
+}
+
 void
 rlock_acquire(rtld_lock_t lock, RtldLockState *state)
 {
+       void *tid = myid();
        int v;
-       int tid = 0;
 
        v = lock->lock;
        cpu_ccfence();
@@ -93,39 +117,46 @@ rlock_acquire(rtld_lock_t lock, RtldLockState *state)
                                break;
                        }
                } else {
-                       if (tid == 0)
-                               tid = lwp_gettid();
-                       if (lock->tid == tid) {
+                       if (lock->owner == tid) {
                                ++lock->count;
                                state->lockstate = RTLD_LOCK_WLOCKED;
                                break;
                        }
-                       umtx_sleep(&lock->lock, v, 0);
-                       v = lock->lock;
-                       cpu_ccfence();
+                       if (atomic_fcmpset_int(&lock->lock, &v, v | SLFLAG)) {
+                               umtx_sleep(&lock->lock, v, 0);
+                       }
                }
+               cpu_ccfence();
        }
 }
 
 void
 wlock_acquire(rtld_lock_t lock, RtldLockState *state)
 {
+       void *tid = myid();
        sigset_t tmp_oldsigmask;
-       int tid = lwp_gettid();
+       int v;
 
-       if (lock->tid == tid) {
+       if (lock->owner == tid) {
                ++lock->count;
                state->lockstate = RTLD_LOCK_WLOCKED;
                return;
        }
 
        sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask);
+       v = lock->lock;
        for (;;) {
-               if (atomic_cmpset_acq_int(&lock->lock, 0, WAFLAG))
-                       break;
-               umtx_sleep(&lock->lock, 0, 0);
+               if ((v & ~SLFLAG) == 0) {
+                       if (atomic_fcmpset_int(&lock->lock, &v, WAFLAG))
+                               break;
+               } else {
+                       if (atomic_fcmpset_int(&lock->lock, &v, v | SLFLAG)) {
+                               umtx_sleep(&lock->lock, v, 0);
+                       }
+               }
+               cpu_ccfence();
        }
-       lock->tid = tid;
+       lock->owner = tid;
        lock->count = 1;
        lock->savesigmask = tmp_oldsigmask;
        state->lockstate = RTLD_LOCK_WLOCKED;
@@ -141,14 +172,18 @@ lock_release(rtld_lock_t lock, RtldLockState *state)
                return;
        if ((lock->lock & WAFLAG) == 0) {
                v = atomic_fetchadd_int(&lock->lock, -RC_INCR) - RC_INCR;
-               if (v == 0)
+               if (v == SLFLAG) {
+                       atomic_clear_int(&lock->lock, SLFLAG);
                        umtx_wakeup(&lock->lock, 0);
+               }
        } else if (--lock->count == 0) {
                tmp_oldsigmask = lock->savesigmask;
-               lock->tid = 0;
+               lock->owner = NULL;
                v = atomic_fetchadd_int(&lock->lock, -WAFLAG) - WAFLAG;
-               if (v == 0)
+               if (v == SLFLAG) {
+                       atomic_clear_int(&lock->lock, SLFLAG);
                        umtx_wakeup(&lock->lock, 0);
+               }
                sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL);
        }
        state->lockstate = RTLD_LOCK_UNLOCKED;