From 50caca1a70a5f1f9dfba401f114bed0e721bbb0f Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sat, 11 May 2019 21:01:55 -0700 Subject: [PATCH] rtld-elf - Notify thread state to optimize relocations * Add shims to allow libthread_xu to notify rtld when threading is being used. * Requires weak symbols in libc which are overriden by rtld-elf. * Implement the feature in rtld-elf and use it to avoid making calls to lwp_gettid(). When threaded, use tls_get_tcb() (which does not require a system call) instead of lwp_gettid(). When not threaded, just use a constant. NOTE: We cannot use tls_get_tcb() unconditionally because the tcb is not setup during early relocations. So do this whack-a-mole to make it work. * This leaves just the sigprocmask wrappers around rtld-elf (which are needed to prevent stacked relocations from signal handlers). Poked-by: mjg --- lib/libc/gen/Symbol.map | 1 + lib/libc/gen/dlfcn.c | 8 +++ lib/libthread_xu/thread/thr_kern.c | 1 + lib/libthread_xu/thread/thr_private.h | 1 + libexec/rtld-elf/Symbol.map | 1 + libexec/rtld-elf/rtld.c | 1 + libexec/rtld-elf/rtld_lock.c | 71 ++++++++++++++++++++------- 7 files changed, 66 insertions(+), 18 deletions(-) diff --git a/lib/libc/gen/Symbol.map b/lib/libc/gen/Symbol.map index 05cec48eeb..fdeec56f2c 100644 --- a/lib/libc/gen/Symbol.map +++ b/lib/libc/gen/Symbol.map @@ -693,6 +693,7 @@ DFprivate_1.0 { _rtld_thread_prefork; _rtld_thread_postfork; _rtld_thread_childfork; + _rtld_setthreaded; _thread_finalize; _seekdir; _setcontext; diff --git a/lib/libc/gen/dlfcn.c b/lib/libc/gen/dlfcn.c index 0080c1e32c..28c1ecef60 100644 --- a/lib/libc/gen/dlfcn.c +++ b/lib/libc/gen/dlfcn.c @@ -38,6 +38,7 @@ void _rtld_thread_childfork(void); void _rtld_thread_init(void *); void _rtld_thread_postfork(void); void _rtld_thread_prefork(void); +void _rtld_setthreaded(void); extern char **environ; @@ -226,3 +227,10 @@ void _rtld_thread_childfork(void) { } + +#pragma weak _rtld_setthreaded +int write(int, void *, size_t); +void +_rtld_setthreaded(void) +{ +} diff --git a/lib/libthread_xu/thread/thr_kern.c b/lib/libthread_xu/thread/thr_kern.c index c15c2a39cd..410af80c94 100644 --- a/lib/libthread_xu/thread/thr_kern.c +++ b/lib/libthread_xu/thread/thr_kern.c @@ -51,6 +51,7 @@ _thr_setthreaded(int threaded) if (((threaded == 0) ^ (__isthreaded == 0)) == 0) return (0); __isthreaded = threaded; + _rtld_setthreaded(threaded); #if 0 /* save for later. */ if (threaded != 0) diff --git a/lib/libthread_xu/thread/thr_private.h b/lib/libthread_xu/thread/thr_private.h index 97b2733ad3..bc08a7a329 100644 --- a/lib/libthread_xu/thread/thr_private.h +++ b/lib/libthread_xu/thread/thr_private.h @@ -678,6 +678,7 @@ void _thr_ref_delete(struct pthread *, struct pthread *); void _thr_ref_delete_unlocked(struct pthread *, struct pthread *); int _thr_find_thread(struct pthread *, struct pthread *, int); void _thr_malloc_init(void); +void _rtld_setthreaded(int); void _thr_rtld_init(void); void _thr_rtld_fini(void); int _thr_stack_alloc(struct pthread_attr *); diff --git a/libexec/rtld-elf/Symbol.map b/libexec/rtld-elf/Symbol.map index 3490faf9f4..5412719bb4 100644 --- a/libexec/rtld-elf/Symbol.map +++ b/libexec/rtld-elf/Symbol.map @@ -26,6 +26,7 @@ DFprivate_1.0 { _rtld_thread_prefork; _rtld_thread_postfork; _rtld_thread_childfork; + _rtld_setthreaded; _rtld_addr_phdr; _rtld_get_stack_prot; _r_debug_postinit; diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c index 9d599613d6..4eefb1262c 100644 --- a/libexec/rtld-elf/rtld.c +++ b/libexec/rtld-elf/rtld.c @@ -4585,6 +4585,7 @@ _rtld_allocate_tls(void) wlock_acquire(rtld_bind_lock, &lockstate); new_tcb = allocate_tls(obj_list); lock_release(rtld_bind_lock, &lockstate); + return (new_tcb); } diff --git a/libexec/rtld-elf/rtld_lock.c b/libexec/rtld-elf/rtld_lock.c index 124b45a862..d6165b173c 100644 --- a/libexec/rtld-elf/rtld_lock.c +++ b/libexec/rtld-elf/rtld_lock.c @@ -50,6 +50,9 @@ #include #include +#include +#include + #include "debug.h" #include "rtld.h" #include "rtld_machdep.h" @@ -57,12 +60,13 @@ extern pid_t __sys_getpid(void); #define WAFLAG 0x1 /* A writer holds the lock */ -#define RC_INCR 0x2 /* Adjusts count of readers desiring lock */ +#define SLFLAG 0x2 /* Sleep pending on lock */ +#define RC_INCR 0x4 /* Adjusts count of readers desiring lock */ struct Struct_Lock { volatile u_int lock; - int tid; /* owner (exclusive) */ int count; /* recursion (exclusive) */ + void *owner; /* owner (exclusive) - tls_get_tcb() */ sigset_t savesigmask; /* first exclusive owner sets mask */ } __cachealign; @@ -78,11 +82,31 @@ rtld_lock_t rtld_phdr_lock = &phdr_lock; rtld_lock_t rtld_bind_lock = &bind_lock; rtld_lock_t rtld_libc_lock = &libc_lock; +static int _rtld_isthreaded; + +void _rtld_setthreaded(int threaded); + +void +_rtld_setthreaded(int threaded) +{ + _rtld_isthreaded = threaded; +} + +static __inline +void * +myid(void) +{ + if (_rtld_isthreaded) { + return(tls_get_tcb()); + } + return (void *)(intptr_t)1; +} + void rlock_acquire(rtld_lock_t lock, RtldLockState *state) { + void *tid = myid(); int v; - int tid = 0; v = lock->lock; cpu_ccfence(); @@ -93,39 +117,46 @@ rlock_acquire(rtld_lock_t lock, RtldLockState *state) break; } } else { - if (tid == 0) - tid = lwp_gettid(); - if (lock->tid == tid) { + if (lock->owner == tid) { ++lock->count; state->lockstate = RTLD_LOCK_WLOCKED; break; } - umtx_sleep(&lock->lock, v, 0); - v = lock->lock; - cpu_ccfence(); + if (atomic_fcmpset_int(&lock->lock, &v, v | SLFLAG)) { + umtx_sleep(&lock->lock, v, 0); + } } + cpu_ccfence(); } } void wlock_acquire(rtld_lock_t lock, RtldLockState *state) { + void *tid = myid(); sigset_t tmp_oldsigmask; - int tid = lwp_gettid(); + int v; - if (lock->tid == tid) { + if (lock->owner == tid) { ++lock->count; state->lockstate = RTLD_LOCK_WLOCKED; return; } sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask); + v = lock->lock; for (;;) { - if (atomic_cmpset_acq_int(&lock->lock, 0, WAFLAG)) - break; - umtx_sleep(&lock->lock, 0, 0); + if ((v & ~SLFLAG) == 0) { + if (atomic_fcmpset_int(&lock->lock, &v, WAFLAG)) + break; + } else { + if (atomic_fcmpset_int(&lock->lock, &v, v | SLFLAG)) { + umtx_sleep(&lock->lock, v, 0); + } + } + cpu_ccfence(); } - lock->tid = tid; + lock->owner = tid; lock->count = 1; lock->savesigmask = tmp_oldsigmask; state->lockstate = RTLD_LOCK_WLOCKED; @@ -141,14 +172,18 @@ lock_release(rtld_lock_t lock, RtldLockState *state) return; if ((lock->lock & WAFLAG) == 0) { v = atomic_fetchadd_int(&lock->lock, -RC_INCR) - RC_INCR; - if (v == 0) + if (v == SLFLAG) { + atomic_clear_int(&lock->lock, SLFLAG); umtx_wakeup(&lock->lock, 0); + } } else if (--lock->count == 0) { tmp_oldsigmask = lock->savesigmask; - lock->tid = 0; + lock->owner = NULL; v = atomic_fetchadd_int(&lock->lock, -WAFLAG) - WAFLAG; - if (v == 0) + if (v == SLFLAG) { + atomic_clear_int(&lock->lock, SLFLAG); umtx_wakeup(&lock->lock, 0); + } sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL); } state->lockstate = RTLD_LOCK_UNLOCKED; -- 2.41.0