Import libarchive-2.4.17. See NEWS for details.
[dragonfly.git] / contrib / libarchive-2 / libarchive / archive_entry_link_resolver.c
CommitLineData
a44e961d
PA
1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "archive_platform.h"
6ed811d1 27__FBSDID("$FreeBSD: src/lib/libarchive/archive_entry_link_resolver.c,v 1.1 2007/12/30 04:58:21 kientzle Exp $");
a44e961d
PA
28
29#ifdef HAVE_SYS_STAT_H
30#include <sys/stat.h>
31#endif
32#ifdef HAVE_ERRNO_H
33#include <errno.h>
34#endif
35#include <stdio.h>
36#ifdef HAVE_STDLIB_H
37#include <stdlib.h>
38#endif
39#ifdef HAVE_STRING_H
40#include <string.h>
41#endif
42
43#include "archive_entry.h"
44
6ed811d1
PA
45/*
46 * This is mostly a pretty straightforward hash table implementation.
47 * The only interesting bit is the different strategies used to
48 * match up links. These strategies match those used by various
49 * archiving formats:
50 * tar - content stored with first link, remainder refer back to it.
51 * This requires us to match each subsequent link up with the
52 * first appearance.
53 * cpio - Old cpio just stored body with each link, match-ups were
54 * implicit. This is trivial.
55 * new cpio - New cpio only stores body with last link, match-ups
56 * are implicit. This is actually quite tricky; see the notes
57 * below.
58 */
59
a44e961d
PA
60/* Initial size of link cache. */
61#define links_cache_initial_size 1024
62
6ed811d1
PA
63struct links_entry {
64 struct links_entry *next;
65 struct links_entry *previous;
66 int links; /* # links not yet seen */
67 int hash;
68 struct archive_entry *entry;
69};
70
a44e961d 71struct archive_entry_linkresolver {
6ed811d1
PA
72 struct links_entry **buckets;
73 struct links_entry *spare;
a44e961d
PA
74 unsigned long number_entries;
75 size_t number_buckets;
6ed811d1 76 int strategy;
a44e961d
PA
77};
78
6ed811d1
PA
79static struct links_entry *find_entry(struct archive_entry_linkresolver *,
80 struct archive_entry *);
81static void grow_hash(struct archive_entry_linkresolver *);
82static void insert_entry(struct archive_entry_linkresolver *,
83 struct archive_entry *);
a44e961d
PA
84
85struct archive_entry_linkresolver *
86archive_entry_linkresolver_new(void)
87{
6ed811d1 88 struct archive_entry_linkresolver *res;
a44e961d
PA
89 size_t i;
90
6ed811d1
PA
91 res = malloc(sizeof(struct archive_entry_linkresolver));
92 if (res == NULL)
a44e961d 93 return (NULL);
6ed811d1
PA
94 memset(res, 0, sizeof(struct archive_entry_linkresolver));
95 res->number_buckets = links_cache_initial_size;
96 res->buckets = malloc(res->number_buckets *
97 sizeof(res->buckets[0]));
98 if (res->buckets == NULL) {
99 free(res);
a44e961d
PA
100 return (NULL);
101 }
6ed811d1
PA
102 for (i = 0; i < res->number_buckets; i++)
103 res->buckets[i] = NULL;
104 return (res);
105}
106
107void
108archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,
109 int strategy)
110{
111 res->strategy = strategy;
a44e961d
PA
112}
113
114void
6ed811d1 115archive_entry_linkresolver_free(struct archive_entry_linkresolver *res)
a44e961d
PA
116{
117 size_t i;
118
6ed811d1 119 if (res->buckets == NULL)
a44e961d
PA
120 return;
121
6ed811d1
PA
122 for (i = 0; i < res->number_buckets; i++) {
123 while (res->buckets[i] != NULL) {
124 struct links_entry *lp = res->buckets[i]->next;
125 archive_entry_free(res->buckets[i]->entry);
126 free(res->buckets[i]);
127 res->buckets[i] = lp;
a44e961d
PA
128 }
129 }
6ed811d1
PA
130 free(res->buckets);
131 res->buckets = NULL;
a44e961d
PA
132}
133
6ed811d1 134/* Always uses tar-like semantics. */
a44e961d 135const char *
6ed811d1 136archive_entry_linkresolve(struct archive_entry_linkresolver *res,
a44e961d
PA
137 struct archive_entry *entry)
138{
6ed811d1 139 struct links_entry *le;
a44e961d 140
6ed811d1
PA
141 /* If it has only one link, then we're done. */
142 if (archive_entry_nlink(entry) == 1)
143 return (NULL);
a44e961d 144
6ed811d1
PA
145 /* Look it up in the hash. */
146 le = find_entry(res, entry);
147 if (le != NULL)
148 return (archive_entry_pathname(le->entry));
149 /* If it's not there, insert it. */
150 insert_entry(res, entry);
151 return (NULL);
152}
a44e961d 153
6ed811d1
PA
154void
155archive_entry_linkify(struct archive_entry_linkresolver *res,
156 struct archive_entry **e, struct archive_entry **f)
157{
158 struct links_entry *le;
159 struct archive_entry *t;
a44e961d 160
6ed811d1 161 *f = NULL; /* Default: Don't return a second entry. */
a44e961d 162
6ed811d1
PA
163 /* If it has only one link, then we're done. */
164 if (archive_entry_nlink(*e) == 1)
165 return;
a44e961d 166
6ed811d1
PA
167 switch (res->strategy) {
168 case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:
169 le = find_entry(res, *e);
170 if (le != NULL) {
171 archive_entry_set_size(*e, 0);
172 archive_entry_set_hardlink(*e,
173 archive_entry_pathname(le->entry));
174 } else
175 insert_entry(res, *e);
176 return;
177 case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:
178 /* This one is trivial. */
179 return;
180 case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:
181 le = find_entry(res, *e);
182 if (le != NULL) {
183 t = *e;
184 *e = le->entry;
185 le->entry = t;
186 archive_entry_set_size(*e, 0);
187 archive_entry_set_hardlink(*e,
188 archive_entry_pathname(le->entry));
189 if (le->links == 0) {
190 *f = le->entry;
a44e961d 191 }
6ed811d1
PA
192 } else {
193 insert_entry(res, *e);
194 *e = NULL;
a44e961d 195 }
6ed811d1
PA
196 return;
197 default:
198 break;
a44e961d 199 }
6ed811d1
PA
200 return;
201}
202
203static struct links_entry *
204find_entry(struct archive_entry_linkresolver *res,
205 struct archive_entry *entry)
206{
207 struct links_entry *le;
208 int hash, bucket;
209 dev_t dev;
210 ino_t ino;
211
212 /* Free a held entry. */
213 if (res->spare != NULL) {
214 archive_entry_free(res->spare->entry);
215 free(res->spare);
216 res->spare = NULL;
217 }
218
219 /* If the links cache overflowed and got flushed, don't bother. */
220 if (res->buckets == NULL)
221 return (NULL);
222
223 dev = archive_entry_dev(entry);
224 ino = archive_entry_ino(entry);
225 hash = dev ^ ino;
a44e961d
PA
226
227 /* Try to locate this entry in the links cache. */
6ed811d1
PA
228 bucket = hash % res->number_buckets;
229 for (le = res->buckets[bucket]; le != NULL; le = le->next) {
230 if (le->hash == hash
231 && dev == archive_entry_dev(le->entry)
232 && ino == archive_entry_ino(le->entry)) {
a44e961d
PA
233 /*
234 * Decrement link count each time and release
235 * the entry if it hits zero. This saves
236 * memory and is necessary for detecting
237 * missed links.
238 */
239 --le->links;
240 if (le->links > 0)
6ed811d1
PA
241 return (le);
242 /* Remove it from this hash bucket. */
a44e961d
PA
243 if (le->previous != NULL)
244 le->previous->next = le->next;
245 if (le->next != NULL)
246 le->next->previous = le->previous;
6ed811d1
PA
247 if (res->buckets[bucket] == le)
248 res->buckets[bucket] = le->next;
249 res->number_entries--;
250 /* Defer freeing this entry. */
251 res->spare = le;
252 return (le);
a44e961d
PA
253 }
254 }
6ed811d1
PA
255 return (NULL);
256}
257
258static void
259insert_entry(struct archive_entry_linkresolver *res,
260 struct archive_entry *entry)
261{
262 struct links_entry *le;
263 int hash, bucket;
a44e961d
PA
264
265 /* Add this entry to the links cache. */
266 le = malloc(sizeof(struct links_entry));
267 if (le == NULL)
6ed811d1
PA
268 return;
269 le->entry = archive_entry_clone(entry);
270 if (le->entry == NULL) {
a44e961d 271 free(le);
6ed811d1 272 return;
a44e961d
PA
273 }
274
6ed811d1
PA
275 /* If the links cache is getting too full, enlarge the hash table. */
276 if (res->number_entries > res->number_buckets * 2)
277 grow_hash(res);
278
279 hash = archive_entry_dev(entry) ^ archive_entry_ino(entry);
280 bucket = hash % res->number_buckets;
281
a44e961d 282 /* If we could allocate the entry, record it. */
6ed811d1
PA
283 if (res->buckets[bucket] != NULL)
284 res->buckets[bucket]->previous = le;
285 res->number_entries++;
286 le->next = res->buckets[bucket];
a44e961d 287 le->previous = NULL;
6ed811d1
PA
288 res->buckets[bucket] = le;
289 le->hash = hash;
290 le->links = archive_entry_nlink(entry) - 1;
291}
292
293static void
294grow_hash(struct archive_entry_linkresolver *res)
295{
296 struct links_entry *le, **new_buckets;
297 size_t new_size;
298 size_t i, bucket;
299
300 /* Try to enlarge the bucket list. */
301 new_size = res->number_buckets * 2;
302 new_buckets = malloc(new_size * sizeof(struct links_entry *));
303
304 if (new_buckets != NULL) {
305 memset(new_buckets, 0,
306 new_size * sizeof(struct links_entry *));
307 for (i = 0; i < res->number_buckets; i++) {
308 while (res->buckets[i] != NULL) {
309 /* Remove entry from old bucket. */
310 le = res->buckets[i];
311 res->buckets[i] = le->next;
312
313 /* Add entry to new bucket. */
314 bucket = le->hash % new_size;
315
316 if (new_buckets[bucket] != NULL)
317 new_buckets[bucket]->previous =
318 le;
319 le->next = new_buckets[bucket];
320 le->previous = NULL;
321 new_buckets[bucket] = le;
322 }
323 }
324 free(res->buckets);
325 res->buckets = new_buckets;
326 res->number_buckets = new_size;
327 }
a44e961d 328}