2 * Copyright 1999, 2000 John D. Polstra.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 * $FreeBSD: src/libexec/rtld-elf/i386/lockdflt.c,v 1.5.2.4 2002/07/11 23:52:32 jdp Exp $
29 * Thread locking implementation for the dynamic linker.
31 * On 80486 and later CPUs we use the "simple, non-scalable
32 * reader-preference lock" from:
34 * J. M. Mellor-Crummey and M. L. Scott. "Scalable Reader-Writer
35 * Synchronization for Shared-Memory Multiprocessors." 3rd ACM Symp. on
36 * Principles and Practice of Parallel Programming, April 1991.
38 * In this algorithm the lock is a single word. Its low-order bit is
39 * set when a writer holds the lock. The remaining high-order bits
40 * contain a count of readers desiring the lock. The algorithm requires
41 * atomic "compare_and_store" and "add" operations.
43 * The "compare_and_store" operation requires the "cmpxchg" instruction
44 * on the x86. Unfortunately, the 80386 CPU does not support that
45 * instruction -- only the 80486 and later models support it. So on the
46 * 80386 we must use simple test-and-set exclusive locks instead. We
47 * determine which kind of lock to use by trying to execute a "cmpxchg"
48 * instruction and catching the SIGILL which results on the 80386.
59 #define CACHE_LINE_SIZE 32
61 #define WAFLAG 0x1 /* A writer holds the lock */
62 #define RC_INCR 0x2 /* Adjusts count of readers desiring lock */
64 typedef struct Struct_Lock {
69 static sigset_t fullsigmask, oldsigmask;
72 cmpxchgl(int old, int new, volatile int *m)
76 __asm __volatile ("lock; cmpxchgl %2, %0"
77 : "+m"(*m), "=a"(result)
85 xchgl(int v, volatile int *m)
89 __asm __volatile ("xchgl %0, %1"
90 : "=r"(result), "+m"(*m)
97 lock_create(void *context)
105 * Arrange for the lock to occupy its own cache line. First, we
106 * optimistically allocate just a cache line, hoping that malloc
107 * will give us a well-aligned block of memory. If that doesn't
108 * work, we allocate a larger block and take a well-aligned cache
111 base = xmalloc(CACHE_LINE_SIZE);
113 if ((uintptr_t)p % CACHE_LINE_SIZE != 0) {
115 base = xmalloc(2 * CACHE_LINE_SIZE);
117 if ((r = (uintptr_t)p % CACHE_LINE_SIZE) != 0)
118 p += CACHE_LINE_SIZE - r;
127 lock_destroy(void *lock)
129 Lock *l = (Lock *)lock;
135 * Crude exclusive locks for the 80386, which does not support the
136 * cmpxchg instruction.
139 lock80386_acquire(void *lock)
141 Lock *l = (Lock *)lock;
142 sigset_t tmp_oldsigmask;
145 sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask);
146 if (xchgl(1, &l->lock) == 0)
148 sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL);
152 oldsigmask = tmp_oldsigmask;
156 lock80386_release(void *lock)
158 Lock *l = (Lock *)lock;
161 sigprocmask(SIG_SETMASK, &oldsigmask, NULL);
165 * Better reader/writer locks for the 80486 and later CPUs.
168 rlock_acquire(void *lock)
170 Lock *l = (Lock *)lock;
172 atomic_add_int(&l->lock, RC_INCR);
173 while (l->lock & WAFLAG)
178 wlock_acquire(void *lock)
180 Lock *l = (Lock *)lock;
181 sigset_t tmp_oldsigmask;
184 sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask);
185 if (cmpxchgl(0, WAFLAG, &l->lock) == 0)
187 sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL);
189 oldsigmask = tmp_oldsigmask;
193 rlock_release(void *lock)
195 Lock *l = (Lock *)lock;
197 atomic_add_int(&l->lock, -RC_INCR);
201 wlock_release(void *lock)
203 Lock *l = (Lock *)lock;
205 atomic_add_int(&l->lock, -WAFLAG);
206 sigprocmask(SIG_SETMASK, &oldsigmask, NULL);
210 * Code to determine at runtime whether the CPU supports the cmpxchg
211 * instruction. This instruction allows us to use locks that are more
212 * efficient, but it didn't exist on the 80386.
214 static jmp_buf sigill_env;
219 longjmp(sigill_env, 1);
223 cpu_supports_cmpxchg(void)
225 struct sigaction act, oact;
229 memset(&act, 0, sizeof act);
230 act.sa_handler = sigill;
231 sigemptyset(&act.sa_mask);
234 sigaction(SIGILL, &act, &oact);
235 if (setjmp(sigill_env) == 0) {
237 cmpxchgl(0, 1, &lock);
241 sigaction(SIGILL, &oact, NULL);
246 lockdflt_init(LockInfo *li)
249 li->context_destroy = NULL;
250 li->lock_create = lock_create;
251 li->lock_destroy = lock_destroy;
252 if (cpu_supports_cmpxchg()) {
253 /* Use fast locks that require an 80486 or later. */
254 li->rlock_acquire = rlock_acquire;
255 li->wlock_acquire = wlock_acquire;
256 li->rlock_release = rlock_release;
257 li->wlock_release = wlock_release;
259 /* It's a cruddy old 80386. */
260 li->rlock_acquire = li->wlock_acquire = lock80386_acquire;
261 li->rlock_release = li->wlock_release = lock80386_release;
264 * Construct a mask to block all signals except traps which might
265 * conceivably be generated within the dynamic linker itself.
267 sigfillset(&fullsigmask);
268 sigdelset(&fullsigmask, SIGILL);
269 sigdelset(&fullsigmask, SIGTRAP);
270 sigdelset(&fullsigmask, SIGABRT);
271 sigdelset(&fullsigmask, SIGEMT);
272 sigdelset(&fullsigmask, SIGFPE);
273 sigdelset(&fullsigmask, SIGBUS);
274 sigdelset(&fullsigmask, SIGSEGV);
275 sigdelset(&fullsigmask, SIGSYS);