Import libarchive-2.8.5.
[dragonfly.git] / contrib / libarchive / libarchive / archive_entry_link_resolver.c
CommitLineData
a44e961d
PA
1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "archive_platform.h"
9c82a63e 27__FBSDID("$FreeBSD: head/lib/libarchive/archive_entry_link_resolver.c 201100 2009-12-28 03:05:31Z kientzle $");
a44e961d
PA
28
29#ifdef HAVE_SYS_STAT_H
30#include <sys/stat.h>
31#endif
32#ifdef HAVE_ERRNO_H
33#include <errno.h>
34#endif
35#include <stdio.h>
36#ifdef HAVE_STDLIB_H
37#include <stdlib.h>
38#endif
39#ifdef HAVE_STRING_H
40#include <string.h>
41#endif
42
8bf2d2ac 43#include "archive.h"
a44e961d
PA
44#include "archive_entry.h"
45
6ed811d1
PA
46/*
47 * This is mostly a pretty straightforward hash table implementation.
48 * The only interesting bit is the different strategies used to
49 * match up links. These strategies match those used by various
50 * archiving formats:
51 * tar - content stored with first link, remainder refer back to it.
52 * This requires us to match each subsequent link up with the
53 * first appearance.
54 * cpio - Old cpio just stored body with each link, match-ups were
55 * implicit. This is trivial.
56 * new cpio - New cpio only stores body with last link, match-ups
57 * are implicit. This is actually quite tricky; see the notes
58 * below.
59 */
60
8bf2d2ac
PA
61/* Users pass us a format code, we translate that into a strategy here. */
62#define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0
63#define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1
64#define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2
65#define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3
66
a44e961d
PA
67/* Initial size of link cache. */
68#define links_cache_initial_size 1024
69
6ed811d1
PA
70struct links_entry {
71 struct links_entry *next;
72 struct links_entry *previous;
73 int links; /* # links not yet seen */
74 int hash;
8bf2d2ac 75 struct archive_entry *canonical;
6ed811d1
PA
76 struct archive_entry *entry;
77};
78
a44e961d 79struct archive_entry_linkresolver {
6ed811d1
PA
80 struct links_entry **buckets;
81 struct links_entry *spare;
a44e961d
PA
82 unsigned long number_entries;
83 size_t number_buckets;
6ed811d1 84 int strategy;
a44e961d
PA
85};
86
6ed811d1
PA
87static struct links_entry *find_entry(struct archive_entry_linkresolver *,
88 struct archive_entry *);
89static void grow_hash(struct archive_entry_linkresolver *);
8bf2d2ac 90static struct links_entry *insert_entry(struct archive_entry_linkresolver *,
6ed811d1 91 struct archive_entry *);
8bf2d2ac 92static struct links_entry *next_entry(struct archive_entry_linkresolver *);
a44e961d
PA
93
94struct archive_entry_linkresolver *
95archive_entry_linkresolver_new(void)
96{
6ed811d1 97 struct archive_entry_linkresolver *res;
a44e961d
PA
98 size_t i;
99
6ed811d1
PA
100 res = malloc(sizeof(struct archive_entry_linkresolver));
101 if (res == NULL)
a44e961d 102 return (NULL);
6ed811d1
PA
103 memset(res, 0, sizeof(struct archive_entry_linkresolver));
104 res->number_buckets = links_cache_initial_size;
105 res->buckets = malloc(res->number_buckets *
106 sizeof(res->buckets[0]));
107 if (res->buckets == NULL) {
108 free(res);
a44e961d
PA
109 return (NULL);
110 }
6ed811d1
PA
111 for (i = 0; i < res->number_buckets; i++)
112 res->buckets[i] = NULL;
113 return (res);
114}
115
116void
117archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,
8bf2d2ac 118 int fmt)
6ed811d1 119{
8bf2d2ac
PA
120 int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;
121
122 switch (fmtbase) {
123 case ARCHIVE_FORMAT_CPIO:
124 switch (fmt) {
125 case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:
126 case ARCHIVE_FORMAT_CPIO_SVR4_CRC:
127 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;
128 break;
129 default:
130 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
131 break;
132 }
133 break;
134 case ARCHIVE_FORMAT_MTREE:
135 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE;
136 break;
137 case ARCHIVE_FORMAT_TAR:
138 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
139 break;
140 default:
141 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
142 break;
143 }
a44e961d
PA
144}
145
146void
6ed811d1 147archive_entry_linkresolver_free(struct archive_entry_linkresolver *res)
a44e961d 148{
8bf2d2ac 149 struct links_entry *le;
a44e961d 150
8bf2d2ac 151 if (res == NULL)
a44e961d
PA
152 return;
153
8bf2d2ac
PA
154 if (res->buckets != NULL) {
155 while ((le = next_entry(res)) != NULL)
156 archive_entry_free(le->entry);
157 free(res->buckets);
158 res->buckets = NULL;
a44e961d 159 }
8bf2d2ac 160 free(res);
6ed811d1 161}
a44e961d 162
6ed811d1
PA
163void
164archive_entry_linkify(struct archive_entry_linkresolver *res,
165 struct archive_entry **e, struct archive_entry **f)
166{
167 struct links_entry *le;
168 struct archive_entry *t;
a44e961d 169
6ed811d1 170 *f = NULL; /* Default: Don't return a second entry. */
a44e961d 171
8bf2d2ac
PA
172 if (*e == NULL) {
173 le = next_entry(res);
174 if (le != NULL) {
175 *e = le->entry;
176 le->entry = NULL;
177 }
178 return;
179 }
180
6ed811d1
PA
181 /* If it has only one link, then we're done. */
182 if (archive_entry_nlink(*e) == 1)
183 return;
9c82a63e
PA
184 /* Directories, devices never have hardlinks. */
185 if (archive_entry_filetype(*e) == AE_IFDIR
186 || archive_entry_filetype(*e) == AE_IFBLK
187 || archive_entry_filetype(*e) == AE_IFCHR)
08895e67 188 return;
a44e961d 189
6ed811d1
PA
190 switch (res->strategy) {
191 case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:
192 le = find_entry(res, *e);
193 if (le != NULL) {
8029ab02 194 archive_entry_unset_size(*e);
8bf2d2ac
PA
195 archive_entry_copy_hardlink(*e,
196 archive_entry_pathname(le->canonical));
197 } else
198 insert_entry(res, *e);
199 return;
200 case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE:
201 le = find_entry(res, *e);
202 if (le != NULL) {
203 archive_entry_copy_hardlink(*e,
204 archive_entry_pathname(le->canonical));
6ed811d1
PA
205 } else
206 insert_entry(res, *e);
207 return;
208 case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:
209 /* This one is trivial. */
210 return;
211 case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:
212 le = find_entry(res, *e);
213 if (le != NULL) {
8bf2d2ac
PA
214 /*
215 * Put the new entry in le, return the
216 * old entry from le.
217 */
6ed811d1
PA
218 t = *e;
219 *e = le->entry;
220 le->entry = t;
8bf2d2ac 221 /* Make the old entry into a hardlink. */
8029ab02 222 archive_entry_unset_size(*e);
8bf2d2ac
PA
223 archive_entry_copy_hardlink(*e,
224 archive_entry_pathname(le->canonical));
225 /* If we ran out of links, return the
226 * final entry as well. */
6ed811d1
PA
227 if (le->links == 0) {
228 *f = le->entry;
8bf2d2ac 229 le->entry = NULL;
a44e961d 230 }
6ed811d1 231 } else {
8bf2d2ac
PA
232 /*
233 * If we haven't seen it, tuck it away
234 * for future use.
235 */
236 le = insert_entry(res, *e);
237 le->entry = *e;
6ed811d1 238 *e = NULL;
a44e961d 239 }
6ed811d1
PA
240 return;
241 default:
242 break;
a44e961d 243 }
6ed811d1
PA
244 return;
245}
246
247static struct links_entry *
248find_entry(struct archive_entry_linkresolver *res,
249 struct archive_entry *entry)
250{
251 struct links_entry *le;
252 int hash, bucket;
253 dev_t dev;
9c82a63e 254 int64_t ino;
6ed811d1
PA
255
256 /* Free a held entry. */
257 if (res->spare != NULL) {
8bf2d2ac 258 archive_entry_free(res->spare->canonical);
6ed811d1
PA
259 archive_entry_free(res->spare->entry);
260 free(res->spare);
261 res->spare = NULL;
262 }
263
264 /* If the links cache overflowed and got flushed, don't bother. */
265 if (res->buckets == NULL)
266 return (NULL);
267
268 dev = archive_entry_dev(entry);
9c82a63e
PA
269 ino = archive_entry_ino64(entry);
270 hash = (int)(dev ^ ino);
a44e961d
PA
271
272 /* Try to locate this entry in the links cache. */
6ed811d1
PA
273 bucket = hash % res->number_buckets;
274 for (le = res->buckets[bucket]; le != NULL; le = le->next) {
275 if (le->hash == hash
8bf2d2ac 276 && dev == archive_entry_dev(le->canonical)
9c82a63e 277 && ino == archive_entry_ino64(le->canonical)) {
a44e961d
PA
278 /*
279 * Decrement link count each time and release
280 * the entry if it hits zero. This saves
281 * memory and is necessary for detecting
282 * missed links.
283 */
284 --le->links;
285 if (le->links > 0)
6ed811d1
PA
286 return (le);
287 /* Remove it from this hash bucket. */
a44e961d
PA
288 if (le->previous != NULL)
289 le->previous->next = le->next;
290 if (le->next != NULL)
291 le->next->previous = le->previous;
6ed811d1
PA
292 if (res->buckets[bucket] == le)
293 res->buckets[bucket] = le->next;
294 res->number_entries--;
295 /* Defer freeing this entry. */
296 res->spare = le;
297 return (le);
a44e961d
PA
298 }
299 }
6ed811d1
PA
300 return (NULL);
301}
302
8bf2d2ac
PA
303static struct links_entry *
304next_entry(struct archive_entry_linkresolver *res)
305{
306 struct links_entry *le;
307 size_t bucket;
308
309 /* Free a held entry. */
310 if (res->spare != NULL) {
311 archive_entry_free(res->spare->canonical);
312 free(res->spare);
313 res->spare = NULL;
314 }
315
316 /* If the links cache overflowed and got flushed, don't bother. */
317 if (res->buckets == NULL)
318 return (NULL);
319
320 /* Look for next non-empty bucket in the links cache. */
321 for (bucket = 0; bucket < res->number_buckets; bucket++) {
322 le = res->buckets[bucket];
323 if (le != NULL) {
324 /* Remove it from this hash bucket. */
325 if (le->next != NULL)
326 le->next->previous = le->previous;
327 res->buckets[bucket] = le->next;
328 res->number_entries--;
329 /* Defer freeing this entry. */
330 res->spare = le;
331 return (le);
332 }
333 }
334 return (NULL);
335}
336
337static struct links_entry *
6ed811d1
PA
338insert_entry(struct archive_entry_linkresolver *res,
339 struct archive_entry *entry)
340{
341 struct links_entry *le;
342 int hash, bucket;
a44e961d
PA
343
344 /* Add this entry to the links cache. */
345 le = malloc(sizeof(struct links_entry));
346 if (le == NULL)
8bf2d2ac
PA
347 return (NULL);
348 memset(le, 0, sizeof(*le));
349 le->canonical = archive_entry_clone(entry);
a44e961d 350
6ed811d1
PA
351 /* If the links cache is getting too full, enlarge the hash table. */
352 if (res->number_entries > res->number_buckets * 2)
353 grow_hash(res);
354
9c82a63e 355 hash = archive_entry_dev(entry) ^ archive_entry_ino64(entry);
6ed811d1
PA
356 bucket = hash % res->number_buckets;
357
a44e961d 358 /* If we could allocate the entry, record it. */
6ed811d1
PA
359 if (res->buckets[bucket] != NULL)
360 res->buckets[bucket]->previous = le;
361 res->number_entries++;
362 le->next = res->buckets[bucket];
a44e961d 363 le->previous = NULL;
6ed811d1
PA
364 res->buckets[bucket] = le;
365 le->hash = hash;
366 le->links = archive_entry_nlink(entry) - 1;
8bf2d2ac 367 return (le);
6ed811d1
PA
368}
369
370static void
371grow_hash(struct archive_entry_linkresolver *res)
372{
373 struct links_entry *le, **new_buckets;
374 size_t new_size;
375 size_t i, bucket;
376
377 /* Try to enlarge the bucket list. */
378 new_size = res->number_buckets * 2;
379 new_buckets = malloc(new_size * sizeof(struct links_entry *));
380
381 if (new_buckets != NULL) {
382 memset(new_buckets, 0,
383 new_size * sizeof(struct links_entry *));
384 for (i = 0; i < res->number_buckets; i++) {
385 while (res->buckets[i] != NULL) {
386 /* Remove entry from old bucket. */
387 le = res->buckets[i];
388 res->buckets[i] = le->next;
389
390 /* Add entry to new bucket. */
391 bucket = le->hash % new_size;
392
393 if (new_buckets[bucket] != NULL)
394 new_buckets[bucket]->previous =
395 le;
396 le->next = new_buckets[bucket];
397 le->previous = NULL;
398 new_buckets[bucket] = le;
399 }
400 }
401 free(res->buckets);
402 res->buckets = new_buckets;
403 res->number_buckets = new_size;
404 }
a44e961d 405}