rtld - Support static TLS bindings for late-loaded shared libraries
authorMatthew Dillon <dillon@apollo.backplane.com>
Fri, 9 Aug 2019 22:22:18 +0000 (15:22 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Fri, 9 Aug 2019 22:31:44 +0000 (15:31 -0700)
* Allow late (manual) dlopen()s to load shared libraries which
  use static TLS variables, as long as there is space.  Do proper
  late-binding and initialize the area for all threads.

* rtld will cache a symbol lookup on first-need for:
  "_pthread_distribute_static_tls" and then call it as needed to
  initialize late-bound static TLS space.

  This symbol is weakly bounded to __libc_distribute_static_tls in libc,
  and strongly overridden by _libthread_distribute_static_tls in
  libthread_xu.

* Fixes mesa glx-tls and others.

* Test code from FreeBSD.  Also tested with other combinations
  including a pthread_create() and -static compilation.

https://github.com/dumbbell/test-tls-initial-exec

include/dlfcn.h
lib/libc/gen/Symbol.map
lib/libc/gen/elf_utils.c
lib/libc/include/libc_private.h
lib/libthread_xu/pthread.map
lib/libthread_xu/thread/Makefile.inc
lib/libthread_xu/thread/thr_distribute.c [new file with mode: 0644]
libexec/rtld-elf/rtld.c
libexec/rtld-elf/rtld.h

index 7b62a5c..33ba5d1 100644 (file)
@@ -62,6 +62,7 @@
 #define        RTLD_NEXT       ((void *) -1)   /* Search subsequent objects. */
 #define        RTLD_DEFAULT    ((void *) -2)   /* Use default search algorithm. */
 #define        RTLD_SELF       ((void *) -3)   /* Search the caller itself. */
+#define        RTLD_ALL        ((void *) -4)   /* Search everything. */
 
 #if __BSD_VISIBLE
 /*
index fdeec56..6e305dd 100644 (file)
@@ -546,6 +546,7 @@ DFprivate_1.0 {
     _pthread_condattr_setclock;
     _pthread_condattr_setpshared;
     _pthread_detach;
+    _pthread_distribute_static_tls;
     _pthread_equal;
     _pthread_exit;
     _pthread_getaffinity_np;
index 7270fe8..7e2bca4 100644 (file)
  * $FreeBSD: head/lib/libc/gen/elf_utils.c 217154 2011-01-08 17:13:43Z kib $
  */
 
+#include <sys/types.h>
 #include <link.h>
+#include <string.h>
+#include <machine/tls.h>
+#include "static_tls.h"
 
 int __elf_phdr_match_addr(struct dl_phdr_info *, void *);
+void __libc_distribute_static_tls(size_t, void *, size_t, size_t);
 
 int
 __elf_phdr_match_addr(struct dl_phdr_info *phdr_info, void *addr)
@@ -47,3 +52,16 @@ __elf_phdr_match_addr(struct dl_phdr_info *phdr_info, void *addr)
        }
        return (i != phdr_info->dlpi_phnum);
 }
+
+void
+__libc_distribute_static_tls(size_t offset, void *src, size_t len,
+                            size_t total_len)
+{
+       uintptr_t tlsbase;
+
+       tlsbase = _libc_get_static_tls_base(offset);
+       memcpy((void *)tlsbase, src, len);
+       memset((char *)tlsbase + len, 0, total_len - len);
+}
+
+__weak_reference(__libc_distribute_static_tls, _pthread_distribute_static_tls);
index ecacb9a..c20bcee 100644 (file)
@@ -35,6 +35,7 @@
 #ifndef _LIBC_PRIVATE_H_
 #define _LIBC_PRIVATE_H_
 
+#include <machine/types.h>
 #include <sys/_pthreadtypes.h>
 
 /*
@@ -106,6 +107,8 @@ void _nmalloc_thr_childfork(void);
 
 struct dl_phdr_info;
 int __elf_phdr_match_addr(struct dl_phdr_info *, void *);
+void __libc_distribute_static_tls(__size_t, void *, __size_t, __size_t);
+__uintptr_t __libc_static_tls_base(__size_t);
 
 /*
  * libc should use libc_dlopen internally, which respects a global
index f1628a5..db4e3d7 100644 (file)
@@ -87,6 +87,7 @@ global:
        _pthread_condattr_setpshared;
        _pthread_create;
        _pthread_detach;
+       _pthread_distribute_static_tls;
        _pthread_equal;
        _pthread_exit;
        _pthread_getaffinity_np;
index 19b4d23..d93993b 100644 (file)
@@ -15,6 +15,7 @@ SRCS+= \
        thr_create.c \
        thr_ctrdtr.c \
        thr_detach.c \
+       thr_distribute.c \
        thr_equal.c \
        thr_event.c \
        thr_exit.c \
diff --git a/lib/libthread_xu/thread/thr_distribute.c b/lib/libthread_xu/thread/thr_distribute.c
new file mode 100644 (file)
index 0000000..4eb5711
--- /dev/null
@@ -0,0 +1,44 @@
+
+#include "namespace.h"
+#include <machine/tls.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#ifdef _PTHREADS_DEBUGGING
+#include <stdio.h>
+#endif
+#include "un-namespace.h"
+
+#include "thr_private.h"
+void
+_libthread_distribute_static_tls(size_t offset, void *src,
+                                size_t len, size_t total_len);
+
+void
+_libthread_distribute_static_tls(size_t offset, void *src,
+                                size_t len, size_t total_len)
+{
+       struct pthread *curthread = tls_get_curthread();
+       struct pthread *td;
+       char *tlsbase;
+
+       THREAD_LIST_LOCK(curthread);
+       TAILQ_FOREACH(td, &_thread_list, tle) {
+               tlsbase = (char *)td->tcb - offset;
+               memcpy(tlsbase, src, len);
+               memset(tlsbase + len, 0, total_len - len);
+       }
+       THREAD_LIST_UNLOCK(curthread);
+}
+
+__strong_reference(_libthread_distribute_static_tls, _pthread_distribute_static_tls);
index 4eefb12..d14cc95 100644 (file)
@@ -85,6 +85,7 @@ static void digest_dynamic2(Obj_Entry *, const Elf_Dyn *, const Elf_Dyn *,
     const Elf_Dyn *);
 static void digest_dynamic(Obj_Entry *, int);
 static Obj_Entry *digest_phdr(const Elf_Phdr *, int, caddr_t, const char *);
+static void distribute_static_tls(Objlist *, RtldLockState *);
 static Obj_Entry *dlcheck(void *);
 static Obj_Entry *dlopen_object(const char *name, int fd, Obj_Entry *refobj,
     int lo_flags, int mode, RtldLockState *lockstate);
@@ -1209,8 +1210,8 @@ digest_dynamic1(Obj_Entry *obj, int early, const Elf_Dyn **dyn_rpath,
                    obj->textrel = true;
                if (dynp->d_un.d_val & DF_BIND_NOW)
                    obj->bind_now = true;
-               /*if (dynp->d_un.d_val & DF_STATIC_TLS)
-                   ;*/
+               if (dynp->d_un.d_val & DF_STATIC_TLS)
+                   obj->static_tls = true;
            break;
 
        case DT_FLAGS_1:
@@ -3053,8 +3054,20 @@ dlopen_object(const char *name, int fd, Obj_Entry *refobj, int lo_flags,
            objlist_push_tail(&list_global, obj);
        if (*old_obj_tail != NULL) {            /* We loaded something new. */
            assert(*old_obj_tail == obj);
-           result = load_needed_objects(obj,
-               lo_flags & (RTLD_LO_DLOPEN | RTLD_LO_EARLY));
+           if ((lo_flags & RTLD_LO_EARLY) == 0 && obj->static_tls &&
+               !allocate_tls_offset(obj)) {
+                   _rtld_error("%s: No space available "
+                               "for static TLS",
+                               obj->path);
+                   result = -1;
+           } else {
+                   result = 0;
+           }
+           if (result == 0) {
+               result = load_needed_objects(
+                           obj,
+                           lo_flags & (RTLD_LO_DLOPEN | RTLD_LO_EARLY));
+           }
            init_dag(obj);
            ref_dag(obj);
            if (result != -1)
@@ -3114,8 +3127,10 @@ dlopen_object(const char *name, int fd, Obj_Entry *refobj, int lo_flags,
        name);
     GDB_STATE(RT_CONSISTENT,obj ? &obj->linkmap : NULL);
 
-    if (!(lo_flags & RTLD_LO_EARLY)) {
+    if ((lo_flags & RTLD_LO_EARLY) == 0) {
        map_stacks_exec(lockstate);
+       if (obj)
+           distribute_static_tls(&initlist, lockstate);
     }
 
     if (initlist_objects_ifunc(&initlist, (mode & RTLD_MODEMASK) == RTLD_NOW,
@@ -3166,12 +3181,17 @@ do_dlsym(void *handle, const char *name, void *retaddr, const Ver_Entry *ve,
     if (sigsetjmp(lockstate.env, 0) != 0)
            lock_upgrade(rtld_bind_lock, &lockstate);
     if (handle == NULL || handle == RTLD_NEXT ||
-       handle == RTLD_DEFAULT || handle == RTLD_SELF) {
-
-       if ((obj = obj_from_addr(retaddr)) == NULL) {
-           _rtld_error("Cannot determine caller's shared object");
-           lock_release(rtld_bind_lock, &lockstate);
-           return NULL;
+       handle == RTLD_DEFAULT || handle == RTLD_SELF ||
+       handle == RTLD_ALL) {
+
+       if (handle != RTLD_ALL) {
+               if ((obj = obj_from_addr(retaddr)) == NULL) {
+                   _rtld_error("Cannot determine caller's shared object");
+                   lock_release(rtld_bind_lock, &lockstate);
+                   return NULL;
+               }
+       } else {
+               obj = obj_list;
        }
        if (handle == NULL) {   /* Just the caller's shared object. */
            res = symlook_obj(&req, obj);
@@ -3180,7 +3200,8 @@ do_dlsym(void *handle, const char *name, void *retaddr, const Ver_Entry *ve,
                defobj = req.defobj_out;
            }
        } else if (handle == RTLD_NEXT || /* Objects after caller's */
-                  handle == RTLD_SELF) { /* ... caller included */
+                  handle == RTLD_SELF || /* ... caller included */
+                  handle == RTLD_ALL) {  /* All Objects */
            if (handle == RTLD_NEXT)
                obj = obj->next;
            for (; obj != NULL; obj = obj->next) {
@@ -4454,8 +4475,10 @@ allocate_tls(Obj_Entry *objs)
            addr = (Elf_Addr)tcb - obj->tlsoffset;
            memset((void *)(addr + obj->tlsinitsize),
                   0, obj->tlssize - obj->tlsinitsize);
-           if (obj->tlsinit)
+           if (obj->tlsinit) {
                memcpy((void*) addr, obj->tlsinit, obj->tlsinitsize);
+               obj->static_tls_copied = true;
+           }
            dtv[obj->tlsindex + 1] = addr;
        }
     }
@@ -4872,6 +4895,43 @@ map_stacks_exec(RtldLockState *lockstate)
         */
 }
 
+/*
+ * Only called after all primary shared libraries are loaded (EARLY is
+ * not set).  Resolves the static TLS distribution function at first-call.
+ * This is typically a weak libc symbol that is overrideen by the threading
+ * library.
+ */
+static void
+distribute_static_tls(Objlist *list, RtldLockState *lockstate)
+{
+       Objlist_Entry *elm;
+       Obj_Entry *obj;
+       static void (*dtlsfunc)(size_t, void *, size_t, size_t);
+
+       /*
+        * First time, resolve "_pthread_distribute_static_tls".
+        */
+       if (dtlsfunc == NULL) {
+               dtlsfunc = (void *)dlfunc(RTLD_ALL,
+                                         "_pthread_distribute_static_tls");
+               if (dtlsfunc == NULL)
+                       return;
+       }
+
+       /*
+        * Initialize static TLS data for the object list using the callback
+        * function (to either libc or pthreads).
+        */
+       STAILQ_FOREACH(elm, list, link) {
+               obj = elm->obj;
+               if (/*obj->marker ||*/ !obj->tls_done || obj->static_tls_copied)
+                       continue;
+               dtlsfunc(obj->tlsoffset, obj->tlsinit,
+                        obj->tlsinitsize, obj->tlssize);
+               obj->static_tls_copied = true;
+       }
+}
+
 void
 symlook_init(SymLook *dst, const char *name)
 {
index 0e94c64..f079a9d 100644 (file)
@@ -254,6 +254,8 @@ typedef struct Struct_Obj_Entry {
     bool valid_hash_sysv : 1;  /* A valid System V hash hash tag is available */
     bool valid_hash_gnu : 1;   /* A valid GNU hash tag is available */
     bool relro_protected : 1;  /* relro section has been protected */
+    bool static_tls : 1;       /* Object wants to use static TLS space */
+    bool static_tls_copied : 1;        /* Object's static TLS space initialized */
 
     struct link_map linkmap;   /* For GDB and dlinfo() */
     Objlist dldags;            /* Object belongs to these dlopened DAGs (%) */