From eeb6957159b8d414a4ad6de31c473c8cd565972b Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Fri, 9 Aug 2019 15:22:18 -0700 Subject: [PATCH] rtld - Support static TLS bindings for late-loaded shared libraries * Allow late (manual) dlopen()s to load shared libraries which use static TLS variables, as long as there is space. Do proper late-binding and initialize the area for all threads. * rtld will cache a symbol lookup on first-need for: "_pthread_distribute_static_tls" and then call it as needed to initialize late-bound static TLS space. This symbol is weakly bounded to __libc_distribute_static_tls in libc, and strongly overridden by _libthread_distribute_static_tls in libthread_xu. * Fixes mesa glx-tls and others. * Test code from FreeBSD. Also tested with other combinations including a pthread_create() and -static compilation. https://github.com/dumbbell/test-tls-initial-exec --- include/dlfcn.h | 1 + lib/libc/gen/Symbol.map | 1 + lib/libc/gen/elf_utils.c | 18 +++++ lib/libc/include/libc_private.h | 3 + lib/libthread_xu/pthread.map | 1 + lib/libthread_xu/thread/Makefile.inc | 1 + lib/libthread_xu/thread/thr_distribute.c | 44 ++++++++++++ libexec/rtld-elf/rtld.c | 86 ++++++++++++++++++++---- libexec/rtld-elf/rtld.h | 2 + 9 files changed, 144 insertions(+), 13 deletions(-) create mode 100644 lib/libthread_xu/thread/thr_distribute.c diff --git a/include/dlfcn.h b/include/dlfcn.h index 7b62a5cf4b..33ba5d1041 100644 --- a/include/dlfcn.h +++ b/include/dlfcn.h @@ -62,6 +62,7 @@ #define RTLD_NEXT ((void *) -1) /* Search subsequent objects. */ #define RTLD_DEFAULT ((void *) -2) /* Use default search algorithm. */ #define RTLD_SELF ((void *) -3) /* Search the caller itself. */ +#define RTLD_ALL ((void *) -4) /* Search everything. */ #if __BSD_VISIBLE /* diff --git a/lib/libc/gen/Symbol.map b/lib/libc/gen/Symbol.map index fdeec56f2c..6e305dd3e4 100644 --- a/lib/libc/gen/Symbol.map +++ b/lib/libc/gen/Symbol.map @@ -546,6 +546,7 @@ DFprivate_1.0 { _pthread_condattr_setclock; _pthread_condattr_setpshared; _pthread_detach; + _pthread_distribute_static_tls; _pthread_equal; _pthread_exit; _pthread_getaffinity_np; diff --git a/lib/libc/gen/elf_utils.c b/lib/libc/gen/elf_utils.c index 7270fe8ace..7e2bca400a 100644 --- a/lib/libc/gen/elf_utils.c +++ b/lib/libc/gen/elf_utils.c @@ -26,9 +26,14 @@ * $FreeBSD: head/lib/libc/gen/elf_utils.c 217154 2011-01-08 17:13:43Z kib $ */ +#include #include +#include +#include +#include "static_tls.h" int __elf_phdr_match_addr(struct dl_phdr_info *, void *); +void __libc_distribute_static_tls(size_t, void *, size_t, size_t); int __elf_phdr_match_addr(struct dl_phdr_info *phdr_info, void *addr) @@ -47,3 +52,16 @@ __elf_phdr_match_addr(struct dl_phdr_info *phdr_info, void *addr) } return (i != phdr_info->dlpi_phnum); } + +void +__libc_distribute_static_tls(size_t offset, void *src, size_t len, + size_t total_len) +{ + uintptr_t tlsbase; + + tlsbase = _libc_get_static_tls_base(offset); + memcpy((void *)tlsbase, src, len); + memset((char *)tlsbase + len, 0, total_len - len); +} + +__weak_reference(__libc_distribute_static_tls, _pthread_distribute_static_tls); diff --git a/lib/libc/include/libc_private.h b/lib/libc/include/libc_private.h index ecacb9a057..c20bcee268 100644 --- a/lib/libc/include/libc_private.h +++ b/lib/libc/include/libc_private.h @@ -35,6 +35,7 @@ #ifndef _LIBC_PRIVATE_H_ #define _LIBC_PRIVATE_H_ +#include #include /* @@ -106,6 +107,8 @@ void _nmalloc_thr_childfork(void); struct dl_phdr_info; int __elf_phdr_match_addr(struct dl_phdr_info *, void *); +void __libc_distribute_static_tls(__size_t, void *, __size_t, __size_t); +__uintptr_t __libc_static_tls_base(__size_t); /* * libc should use libc_dlopen internally, which respects a global diff --git a/lib/libthread_xu/pthread.map b/lib/libthread_xu/pthread.map index f1628a54f6..db4e3d71a3 100644 --- a/lib/libthread_xu/pthread.map +++ b/lib/libthread_xu/pthread.map @@ -87,6 +87,7 @@ global: _pthread_condattr_setpshared; _pthread_create; _pthread_detach; + _pthread_distribute_static_tls; _pthread_equal; _pthread_exit; _pthread_getaffinity_np; diff --git a/lib/libthread_xu/thread/Makefile.inc b/lib/libthread_xu/thread/Makefile.inc index 19b4d23bb3..d93993b1cc 100644 --- a/lib/libthread_xu/thread/Makefile.inc +++ b/lib/libthread_xu/thread/Makefile.inc @@ -15,6 +15,7 @@ SRCS+= \ thr_create.c \ thr_ctrdtr.c \ thr_detach.c \ + thr_distribute.c \ thr_equal.c \ thr_event.c \ thr_exit.c \ diff --git a/lib/libthread_xu/thread/thr_distribute.c b/lib/libthread_xu/thread/thr_distribute.c new file mode 100644 index 0000000000..4eb5711684 --- /dev/null +++ b/lib/libthread_xu/thread/thr_distribute.c @@ -0,0 +1,44 @@ + +#include "namespace.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef _PTHREADS_DEBUGGING +#include +#endif +#include "un-namespace.h" + +#include "thr_private.h" +void +_libthread_distribute_static_tls(size_t offset, void *src, + size_t len, size_t total_len); + +void +_libthread_distribute_static_tls(size_t offset, void *src, + size_t len, size_t total_len) +{ + struct pthread *curthread = tls_get_curthread(); + struct pthread *td; + char *tlsbase; + + THREAD_LIST_LOCK(curthread); + TAILQ_FOREACH(td, &_thread_list, tle) { + tlsbase = (char *)td->tcb - offset; + memcpy(tlsbase, src, len); + memset(tlsbase + len, 0, total_len - len); + } + THREAD_LIST_UNLOCK(curthread); +} + +__strong_reference(_libthread_distribute_static_tls, _pthread_distribute_static_tls); diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c index 4eefb1262c..d14cc95413 100644 --- a/libexec/rtld-elf/rtld.c +++ b/libexec/rtld-elf/rtld.c @@ -85,6 +85,7 @@ static void digest_dynamic2(Obj_Entry *, const Elf_Dyn *, const Elf_Dyn *, const Elf_Dyn *); static void digest_dynamic(Obj_Entry *, int); static Obj_Entry *digest_phdr(const Elf_Phdr *, int, caddr_t, const char *); +static void distribute_static_tls(Objlist *, RtldLockState *); static Obj_Entry *dlcheck(void *); static Obj_Entry *dlopen_object(const char *name, int fd, Obj_Entry *refobj, int lo_flags, int mode, RtldLockState *lockstate); @@ -1209,8 +1210,8 @@ digest_dynamic1(Obj_Entry *obj, int early, const Elf_Dyn **dyn_rpath, obj->textrel = true; if (dynp->d_un.d_val & DF_BIND_NOW) obj->bind_now = true; - /*if (dynp->d_un.d_val & DF_STATIC_TLS) - ;*/ + if (dynp->d_un.d_val & DF_STATIC_TLS) + obj->static_tls = true; break; case DT_FLAGS_1: @@ -3053,8 +3054,20 @@ dlopen_object(const char *name, int fd, Obj_Entry *refobj, int lo_flags, objlist_push_tail(&list_global, obj); if (*old_obj_tail != NULL) { /* We loaded something new. */ assert(*old_obj_tail == obj); - result = load_needed_objects(obj, - lo_flags & (RTLD_LO_DLOPEN | RTLD_LO_EARLY)); + if ((lo_flags & RTLD_LO_EARLY) == 0 && obj->static_tls && + !allocate_tls_offset(obj)) { + _rtld_error("%s: No space available " + "for static TLS", + obj->path); + result = -1; + } else { + result = 0; + } + if (result == 0) { + result = load_needed_objects( + obj, + lo_flags & (RTLD_LO_DLOPEN | RTLD_LO_EARLY)); + } init_dag(obj); ref_dag(obj); if (result != -1) @@ -3114,8 +3127,10 @@ dlopen_object(const char *name, int fd, Obj_Entry *refobj, int lo_flags, name); GDB_STATE(RT_CONSISTENT,obj ? &obj->linkmap : NULL); - if (!(lo_flags & RTLD_LO_EARLY)) { + if ((lo_flags & RTLD_LO_EARLY) == 0) { map_stacks_exec(lockstate); + if (obj) + distribute_static_tls(&initlist, lockstate); } if (initlist_objects_ifunc(&initlist, (mode & RTLD_MODEMASK) == RTLD_NOW, @@ -3166,12 +3181,17 @@ do_dlsym(void *handle, const char *name, void *retaddr, const Ver_Entry *ve, if (sigsetjmp(lockstate.env, 0) != 0) lock_upgrade(rtld_bind_lock, &lockstate); if (handle == NULL || handle == RTLD_NEXT || - handle == RTLD_DEFAULT || handle == RTLD_SELF) { - - if ((obj = obj_from_addr(retaddr)) == NULL) { - _rtld_error("Cannot determine caller's shared object"); - lock_release(rtld_bind_lock, &lockstate); - return NULL; + handle == RTLD_DEFAULT || handle == RTLD_SELF || + handle == RTLD_ALL) { + + if (handle != RTLD_ALL) { + if ((obj = obj_from_addr(retaddr)) == NULL) { + _rtld_error("Cannot determine caller's shared object"); + lock_release(rtld_bind_lock, &lockstate); + return NULL; + } + } else { + obj = obj_list; } if (handle == NULL) { /* Just the caller's shared object. */ res = symlook_obj(&req, obj); @@ -3180,7 +3200,8 @@ do_dlsym(void *handle, const char *name, void *retaddr, const Ver_Entry *ve, defobj = req.defobj_out; } } else if (handle == RTLD_NEXT || /* Objects after caller's */ - handle == RTLD_SELF) { /* ... caller included */ + handle == RTLD_SELF || /* ... caller included */ + handle == RTLD_ALL) { /* All Objects */ if (handle == RTLD_NEXT) obj = obj->next; for (; obj != NULL; obj = obj->next) { @@ -4454,8 +4475,10 @@ allocate_tls(Obj_Entry *objs) addr = (Elf_Addr)tcb - obj->tlsoffset; memset((void *)(addr + obj->tlsinitsize), 0, obj->tlssize - obj->tlsinitsize); - if (obj->tlsinit) + if (obj->tlsinit) { memcpy((void*) addr, obj->tlsinit, obj->tlsinitsize); + obj->static_tls_copied = true; + } dtv[obj->tlsindex + 1] = addr; } } @@ -4872,6 +4895,43 @@ map_stacks_exec(RtldLockState *lockstate) */ } +/* + * Only called after all primary shared libraries are loaded (EARLY is + * not set). Resolves the static TLS distribution function at first-call. + * This is typically a weak libc symbol that is overrideen by the threading + * library. + */ +static void +distribute_static_tls(Objlist *list, RtldLockState *lockstate) +{ + Objlist_Entry *elm; + Obj_Entry *obj; + static void (*dtlsfunc)(size_t, void *, size_t, size_t); + + /* + * First time, resolve "_pthread_distribute_static_tls". + */ + if (dtlsfunc == NULL) { + dtlsfunc = (void *)dlfunc(RTLD_ALL, + "_pthread_distribute_static_tls"); + if (dtlsfunc == NULL) + return; + } + + /* + * Initialize static TLS data for the object list using the callback + * function (to either libc or pthreads). + */ + STAILQ_FOREACH(elm, list, link) { + obj = elm->obj; + if (/*obj->marker ||*/ !obj->tls_done || obj->static_tls_copied) + continue; + dtlsfunc(obj->tlsoffset, obj->tlsinit, + obj->tlsinitsize, obj->tlssize); + obj->static_tls_copied = true; + } +} + void symlook_init(SymLook *dst, const char *name) { diff --git a/libexec/rtld-elf/rtld.h b/libexec/rtld-elf/rtld.h index 0e94c64e7b..f079a9d501 100644 --- a/libexec/rtld-elf/rtld.h +++ b/libexec/rtld-elf/rtld.h @@ -254,6 +254,8 @@ typedef struct Struct_Obj_Entry { bool valid_hash_sysv : 1; /* A valid System V hash hash tag is available */ bool valid_hash_gnu : 1; /* A valid GNU hash tag is available */ bool relro_protected : 1; /* relro section has been protected */ + bool static_tls : 1; /* Object wants to use static TLS space */ + bool static_tls_copied : 1; /* Object's static TLS space initialized */ struct link_map linkmap; /* For GDB and dlinfo() */ Objlist dldags; /* Object belongs to these dlopened DAGs (%) */ -- 2.41.0