rename amd64 architecture to x86_64
[dragonfly.git] / libexec / rtld-elf / x86_64 / lockdflt.c
CommitLineData
f66e9c25
SS
1/*-
2 * Copyright 1999, 2000 John D. Polstra.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *
25 * $FreeBSD: src/libexec/rtld-elf/i386/lockdflt.c,v 1.5.2.4 2002/07/11 23:52:32 jdp Exp $
f66e9c25
SS
26 */
27
28/*
29 * Thread locking implementation for the dynamic linker.
30 *
31 * J. M. Mellor-Crummey and M. L. Scott. "Scalable Reader-Writer
32 * Synchronization for Shared-Memory Multiprocessors." 3rd ACM Symp. on
33 * Principles and Practice of Parallel Programming, April 1991.
34 *
35 * In this algorithm the lock is a single word. Its low-order bit is
36 * set when a writer holds the lock. The remaining high-order bits
37 * contain a count of readers desiring the lock. The algorithm requires
38 * atomic "compare_and_store" and "add" operations.
39 */
40
41#include <setjmp.h>
42#include <signal.h>
43#include <stdlib.h>
44#include <time.h>
45
46#include "debug.h"
47#include "rtld.h"
48
49#define WAFLAG 0x1 /* A writer holds the lock */
50#define RC_INCR 0x2 /* Adjusts count of readers desiring lock */
51
52typedef struct Struct_Lock {
53 volatile int lock;
54 void *base;
55} Lock;
56
57static sigset_t fullsigmask, oldsigmask;
58
59static inline int
60cmpxchgl(int old, int new, volatile int *m)
61{
62 int result;
63
64 __asm __volatile ("lock; cmpxchgl %2, %0"
65 : "+m"(*m), "=a"(result)
66 : "r"(new), "1"(old)
67 : "cc");
68
69 return result;
70}
71
72static void *
73lock_create(void *context)
74{
75 void *base;
76 char *p;
77 uintptr_t r;
78 Lock *l;
79
80 /*
81 * Arrange for the lock to occupy its own cache line. First, we
82 * optimistically allocate just a cache line, hoping that malloc
83 * will give us a well-aligned block of memory. If that doesn't
84 * work, we allocate a larger block and take a well-aligned cache
85 * line from it.
86 */
87 base = xmalloc(CACHE_LINE_SIZE);
88 p = (char *)base;
89 if ((uintptr_t)p % CACHE_LINE_SIZE != 0) {
90 free(base);
91 base = xmalloc(2 * CACHE_LINE_SIZE);
92 p = (char *)base;
93 if ((r = (uintptr_t)p % CACHE_LINE_SIZE) != 0)
94 p += CACHE_LINE_SIZE - r;
95 }
96 l = (Lock *)p;
97 l->base = base;
98 l->lock = 0;
99 return l;
100}
101
102static void
103lock_destroy(void *lock)
104{
105 Lock *l = (Lock *)lock;
106
107 free(l->base);
108}
109
110/*
111 * Better reader/writer locks for the 80486 and later CPUs.
112 */
113static void
114rlock_acquire(void *lock)
115{
116 Lock *l = (Lock *)lock;
117
118 atomic_add_int(&l->lock, RC_INCR);
119 while (l->lock & WAFLAG)
120 ; /* Spin */
121}
122
123static void
124wlock_acquire(void *lock)
125{
126 Lock *l = (Lock *)lock;
127 sigset_t tmp_oldsigmask;
128
129 for ( ; ; ) {
130 sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask);
131 if (cmpxchgl(0, WAFLAG, &l->lock) == 0)
132 break;
133 sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL);
134 }
135 oldsigmask = tmp_oldsigmask;
136}
137
138static void
139rlock_release(void *lock)
140{
141 Lock *l = (Lock *)lock;
142
143 atomic_add_int(&l->lock, -RC_INCR);
144}
145
146static void
147wlock_release(void *lock)
148{
149 Lock *l = (Lock *)lock;
150
151 atomic_add_int(&l->lock, -WAFLAG);
152 sigprocmask(SIG_SETMASK, &oldsigmask, NULL);
153}
154
155void
156lockdflt_init(LockInfo *li)
157{
158 li->context = NULL;
159 li->context_destroy = NULL;
160 li->lock_create = lock_create;
161 li->lock_destroy = lock_destroy;
162 li->rlock_acquire = rlock_acquire;
163 li->wlock_acquire = wlock_acquire;
164 li->rlock_release = rlock_release;
165 li->wlock_release = wlock_release;
166 /*
167 * Construct a mask to block all signals except traps which might
168 * conceivably be generated within the dynamic linker itself.
169 */
170 sigfillset(&fullsigmask);
171 sigdelset(&fullsigmask, SIGILL);
172 sigdelset(&fullsigmask, SIGTRAP);
173 sigdelset(&fullsigmask, SIGABRT);
174 sigdelset(&fullsigmask, SIGEMT);
175 sigdelset(&fullsigmask, SIGFPE);
176 sigdelset(&fullsigmask, SIGBUS);
177 sigdelset(&fullsigmask, SIGSEGV);
178 sigdelset(&fullsigmask, SIGSYS);
179}