Commit | Line | Data |
---|---|---|
a44e961d PA |
1 | /*- |
2 | * Copyright (c) 2003-2007 Tim Kientzle | |
3 | * All rights reserved. | |
4 | * | |
5 | * Redistribution and use in source and binary forms, with or without | |
6 | * modification, are permitted provided that the following conditions | |
7 | * are met: | |
8 | * 1. Redistributions of source code must retain the above copyright | |
9 | * notice, this list of conditions and the following disclaimer. | |
10 | * 2. Redistributions in binary form must reproduce the above copyright | |
11 | * notice, this list of conditions and the following disclaimer in the | |
12 | * documentation and/or other materials provided with the distribution. | |
13 | * | |
14 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR | |
15 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
16 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | |
17 | * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, | |
18 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
19 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
20 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
21 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | |
23 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
24 | */ | |
25 | ||
26 | #include "archive_platform.h" | |
9c82a63e | 27 | __FBSDID("$FreeBSD: head/lib/libarchive/archive_entry_link_resolver.c 201100 2009-12-28 03:05:31Z kientzle $"); |
a44e961d PA |
28 | |
29 | #ifdef HAVE_SYS_STAT_H | |
30 | #include <sys/stat.h> | |
31 | #endif | |
32 | #ifdef HAVE_ERRNO_H | |
33 | #include <errno.h> | |
34 | #endif | |
35 | #include <stdio.h> | |
36 | #ifdef HAVE_STDLIB_H | |
37 | #include <stdlib.h> | |
38 | #endif | |
39 | #ifdef HAVE_STRING_H | |
40 | #include <string.h> | |
41 | #endif | |
42 | ||
8bf2d2ac | 43 | #include "archive.h" |
a44e961d PA |
44 | #include "archive_entry.h" |
45 | ||
6ed811d1 PA |
46 | /* |
47 | * This is mostly a pretty straightforward hash table implementation. | |
48 | * The only interesting bit is the different strategies used to | |
49 | * match up links. These strategies match those used by various | |
50 | * archiving formats: | |
51 | * tar - content stored with first link, remainder refer back to it. | |
52 | * This requires us to match each subsequent link up with the | |
53 | * first appearance. | |
54 | * cpio - Old cpio just stored body with each link, match-ups were | |
55 | * implicit. This is trivial. | |
56 | * new cpio - New cpio only stores body with last link, match-ups | |
57 | * are implicit. This is actually quite tricky; see the notes | |
58 | * below. | |
59 | */ | |
60 | ||
8bf2d2ac PA |
61 | /* Users pass us a format code, we translate that into a strategy here. */ |
62 | #define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0 | |
63 | #define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1 | |
64 | #define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2 | |
65 | #define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3 | |
66 | ||
a44e961d PA |
67 | /* Initial size of link cache. */ |
68 | #define links_cache_initial_size 1024 | |
69 | ||
6ed811d1 PA |
70 | struct links_entry { |
71 | struct links_entry *next; | |
72 | struct links_entry *previous; | |
8bf2d2ac | 73 | struct archive_entry *canonical; |
6ed811d1 | 74 | struct archive_entry *entry; |
c09f92d2 PA |
75 | size_t hash; |
76 | unsigned int links; /* # links not yet seen */ | |
6ed811d1 PA |
77 | }; |
78 | ||
a44e961d | 79 | struct archive_entry_linkresolver { |
6ed811d1 PA |
80 | struct links_entry **buckets; |
81 | struct links_entry *spare; | |
a44e961d PA |
82 | unsigned long number_entries; |
83 | size_t number_buckets; | |
6ed811d1 | 84 | int strategy; |
a44e961d PA |
85 | }; |
86 | ||
c09f92d2 PA |
87 | #define NEXT_ENTRY_DEFERRED 1 |
88 | #define NEXT_ENTRY_PARTIAL 2 | |
89 | #define NEXT_ENTRY_ALL (NEXT_ENTRY_DEFERRED | NEXT_ENTRY_PARTIAL) | |
90 | ||
6ed811d1 PA |
91 | static struct links_entry *find_entry(struct archive_entry_linkresolver *, |
92 | struct archive_entry *); | |
93 | static void grow_hash(struct archive_entry_linkresolver *); | |
8bf2d2ac | 94 | static struct links_entry *insert_entry(struct archive_entry_linkresolver *, |
6ed811d1 | 95 | struct archive_entry *); |
c09f92d2 PA |
96 | static struct links_entry *next_entry(struct archive_entry_linkresolver *, |
97 | int); | |
a44e961d PA |
98 | |
99 | struct archive_entry_linkresolver * | |
100 | archive_entry_linkresolver_new(void) | |
101 | { | |
6ed811d1 | 102 | struct archive_entry_linkresolver *res; |
a44e961d | 103 | |
c09f92d2 PA |
104 | /* Check for positive power-of-two */ |
105 | if (links_cache_initial_size == 0 || | |
106 | (links_cache_initial_size & (links_cache_initial_size - 1)) != 0) | |
107 | return (NULL); | |
108 | ||
109 | res = calloc(1, sizeof(struct archive_entry_linkresolver)); | |
6ed811d1 | 110 | if (res == NULL) |
a44e961d | 111 | return (NULL); |
6ed811d1 | 112 | res->number_buckets = links_cache_initial_size; |
c09f92d2 | 113 | res->buckets = calloc(res->number_buckets, sizeof(res->buckets[0])); |
6ed811d1 PA |
114 | if (res->buckets == NULL) { |
115 | free(res); | |
a44e961d PA |
116 | return (NULL); |
117 | } | |
6ed811d1 PA |
118 | return (res); |
119 | } | |
120 | ||
121 | void | |
122 | archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res, | |
8bf2d2ac | 123 | int fmt) |
6ed811d1 | 124 | { |
8bf2d2ac PA |
125 | int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK; |
126 | ||
127 | switch (fmtbase) { | |
c09f92d2 PA |
128 | case ARCHIVE_FORMAT_7ZIP: |
129 | case ARCHIVE_FORMAT_AR: | |
130 | case ARCHIVE_FORMAT_ZIP: | |
131 | res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; | |
132 | break; | |
8bf2d2ac PA |
133 | case ARCHIVE_FORMAT_CPIO: |
134 | switch (fmt) { | |
135 | case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC: | |
136 | case ARCHIVE_FORMAT_CPIO_SVR4_CRC: | |
137 | res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO; | |
138 | break; | |
139 | default: | |
140 | res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; | |
141 | break; | |
142 | } | |
143 | break; | |
144 | case ARCHIVE_FORMAT_MTREE: | |
145 | res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE; | |
146 | break; | |
c09f92d2 PA |
147 | case ARCHIVE_FORMAT_ISO9660: |
148 | case ARCHIVE_FORMAT_SHAR: | |
8bf2d2ac | 149 | case ARCHIVE_FORMAT_TAR: |
c09f92d2 | 150 | case ARCHIVE_FORMAT_XAR: |
8bf2d2ac PA |
151 | res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR; |
152 | break; | |
153 | default: | |
c09f92d2 | 154 | res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; |
8bf2d2ac PA |
155 | break; |
156 | } | |
a44e961d PA |
157 | } |
158 | ||
159 | void | |
6ed811d1 | 160 | archive_entry_linkresolver_free(struct archive_entry_linkresolver *res) |
a44e961d | 161 | { |
8bf2d2ac | 162 | struct links_entry *le; |
a44e961d | 163 | |
8bf2d2ac | 164 | if (res == NULL) |
a44e961d PA |
165 | return; |
166 | ||
c09f92d2 PA |
167 | while ((le = next_entry(res, NEXT_ENTRY_ALL)) != NULL) |
168 | archive_entry_free(le->entry); | |
169 | free(res->buckets); | |
8bf2d2ac | 170 | free(res); |
6ed811d1 | 171 | } |
a44e961d | 172 | |
6ed811d1 PA |
173 | void |
174 | archive_entry_linkify(struct archive_entry_linkresolver *res, | |
175 | struct archive_entry **e, struct archive_entry **f) | |
176 | { | |
177 | struct links_entry *le; | |
178 | struct archive_entry *t; | |
a44e961d | 179 | |
6ed811d1 | 180 | *f = NULL; /* Default: Don't return a second entry. */ |
a44e961d | 181 | |
8bf2d2ac | 182 | if (*e == NULL) { |
c09f92d2 | 183 | le = next_entry(res, NEXT_ENTRY_DEFERRED); |
8bf2d2ac PA |
184 | if (le != NULL) { |
185 | *e = le->entry; | |
186 | le->entry = NULL; | |
187 | } | |
188 | return; | |
189 | } | |
190 | ||
6ed811d1 PA |
191 | /* If it has only one link, then we're done. */ |
192 | if (archive_entry_nlink(*e) == 1) | |
193 | return; | |
9c82a63e PA |
194 | /* Directories, devices never have hardlinks. */ |
195 | if (archive_entry_filetype(*e) == AE_IFDIR | |
196 | || archive_entry_filetype(*e) == AE_IFBLK | |
197 | || archive_entry_filetype(*e) == AE_IFCHR) | |
08895e67 | 198 | return; |
a44e961d | 199 | |
6ed811d1 PA |
200 | switch (res->strategy) { |
201 | case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR: | |
202 | le = find_entry(res, *e); | |
203 | if (le != NULL) { | |
8029ab02 | 204 | archive_entry_unset_size(*e); |
8bf2d2ac PA |
205 | archive_entry_copy_hardlink(*e, |
206 | archive_entry_pathname(le->canonical)); | |
207 | } else | |
208 | insert_entry(res, *e); | |
209 | return; | |
210 | case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE: | |
211 | le = find_entry(res, *e); | |
212 | if (le != NULL) { | |
213 | archive_entry_copy_hardlink(*e, | |
214 | archive_entry_pathname(le->canonical)); | |
6ed811d1 PA |
215 | } else |
216 | insert_entry(res, *e); | |
217 | return; | |
218 | case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO: | |
219 | /* This one is trivial. */ | |
220 | return; | |
221 | case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO: | |
222 | le = find_entry(res, *e); | |
223 | if (le != NULL) { | |
8bf2d2ac PA |
224 | /* |
225 | * Put the new entry in le, return the | |
226 | * old entry from le. | |
227 | */ | |
6ed811d1 PA |
228 | t = *e; |
229 | *e = le->entry; | |
230 | le->entry = t; | |
8bf2d2ac | 231 | /* Make the old entry into a hardlink. */ |
8029ab02 | 232 | archive_entry_unset_size(*e); |
8bf2d2ac PA |
233 | archive_entry_copy_hardlink(*e, |
234 | archive_entry_pathname(le->canonical)); | |
235 | /* If we ran out of links, return the | |
236 | * final entry as well. */ | |
6ed811d1 PA |
237 | if (le->links == 0) { |
238 | *f = le->entry; | |
8bf2d2ac | 239 | le->entry = NULL; |
a44e961d | 240 | } |
6ed811d1 | 241 | } else { |
8bf2d2ac PA |
242 | /* |
243 | * If we haven't seen it, tuck it away | |
244 | * for future use. | |
245 | */ | |
246 | le = insert_entry(res, *e); | |
d4d8193e PA |
247 | if (le == NULL) |
248 | /* XXX We should return an error code XXX */ | |
249 | return; | |
8bf2d2ac | 250 | le->entry = *e; |
6ed811d1 | 251 | *e = NULL; |
a44e961d | 252 | } |
6ed811d1 PA |
253 | return; |
254 | default: | |
255 | break; | |
a44e961d | 256 | } |
6ed811d1 PA |
257 | return; |
258 | } | |
259 | ||
260 | static struct links_entry * | |
261 | find_entry(struct archive_entry_linkresolver *res, | |
262 | struct archive_entry *entry) | |
263 | { | |
264 | struct links_entry *le; | |
c09f92d2 | 265 | size_t hash, bucket; |
6ed811d1 | 266 | dev_t dev; |
9c82a63e | 267 | int64_t ino; |
6ed811d1 PA |
268 | |
269 | /* Free a held entry. */ | |
270 | if (res->spare != NULL) { | |
8bf2d2ac | 271 | archive_entry_free(res->spare->canonical); |
6ed811d1 PA |
272 | archive_entry_free(res->spare->entry); |
273 | free(res->spare); | |
274 | res->spare = NULL; | |
275 | } | |
276 | ||
6ed811d1 | 277 | dev = archive_entry_dev(entry); |
9c82a63e | 278 | ino = archive_entry_ino64(entry); |
c09f92d2 | 279 | hash = (size_t)(dev ^ ino); |
a44e961d PA |
280 | |
281 | /* Try to locate this entry in the links cache. */ | |
c09f92d2 | 282 | bucket = hash & (res->number_buckets - 1); |
6ed811d1 PA |
283 | for (le = res->buckets[bucket]; le != NULL; le = le->next) { |
284 | if (le->hash == hash | |
8bf2d2ac | 285 | && dev == archive_entry_dev(le->canonical) |
9c82a63e | 286 | && ino == archive_entry_ino64(le->canonical)) { |
a44e961d PA |
287 | /* |
288 | * Decrement link count each time and release | |
289 | * the entry if it hits zero. This saves | |
290 | * memory and is necessary for detecting | |
291 | * missed links. | |
292 | */ | |
293 | --le->links; | |
294 | if (le->links > 0) | |
6ed811d1 PA |
295 | return (le); |
296 | /* Remove it from this hash bucket. */ | |
a44e961d PA |
297 | if (le->previous != NULL) |
298 | le->previous->next = le->next; | |
299 | if (le->next != NULL) | |
300 | le->next->previous = le->previous; | |
6ed811d1 PA |
301 | if (res->buckets[bucket] == le) |
302 | res->buckets[bucket] = le->next; | |
303 | res->number_entries--; | |
304 | /* Defer freeing this entry. */ | |
305 | res->spare = le; | |
306 | return (le); | |
a44e961d PA |
307 | } |
308 | } | |
6ed811d1 PA |
309 | return (NULL); |
310 | } | |
311 | ||
8bf2d2ac | 312 | static struct links_entry * |
c09f92d2 | 313 | next_entry(struct archive_entry_linkresolver *res, int mode) |
8bf2d2ac PA |
314 | { |
315 | struct links_entry *le; | |
316 | size_t bucket; | |
317 | ||
318 | /* Free a held entry. */ | |
319 | if (res->spare != NULL) { | |
320 | archive_entry_free(res->spare->canonical); | |
c09f92d2 | 321 | archive_entry_free(res->spare->entry); |
8bf2d2ac PA |
322 | free(res->spare); |
323 | res->spare = NULL; | |
324 | } | |
325 | ||
8bf2d2ac PA |
326 | /* Look for next non-empty bucket in the links cache. */ |
327 | for (bucket = 0; bucket < res->number_buckets; bucket++) { | |
c09f92d2 PA |
328 | for (le = res->buckets[bucket]; le != NULL; le = le->next) { |
329 | if (le->entry != NULL && | |
330 | (mode & NEXT_ENTRY_DEFERRED) == 0) | |
331 | continue; | |
332 | if (le->entry == NULL && | |
333 | (mode & NEXT_ENTRY_PARTIAL) == 0) | |
334 | continue; | |
8bf2d2ac PA |
335 | /* Remove it from this hash bucket. */ |
336 | if (le->next != NULL) | |
337 | le->next->previous = le->previous; | |
c09f92d2 PA |
338 | if (le->previous != NULL) |
339 | le->previous->next = le->next; | |
340 | else | |
341 | res->buckets[bucket] = le->next; | |
8bf2d2ac PA |
342 | res->number_entries--; |
343 | /* Defer freeing this entry. */ | |
344 | res->spare = le; | |
345 | return (le); | |
346 | } | |
347 | } | |
348 | return (NULL); | |
349 | } | |
350 | ||
351 | static struct links_entry * | |
6ed811d1 PA |
352 | insert_entry(struct archive_entry_linkresolver *res, |
353 | struct archive_entry *entry) | |
354 | { | |
355 | struct links_entry *le; | |
c09f92d2 | 356 | size_t hash, bucket; |
a44e961d PA |
357 | |
358 | /* Add this entry to the links cache. */ | |
c09f92d2 | 359 | le = calloc(1, sizeof(struct links_entry)); |
a44e961d | 360 | if (le == NULL) |
8bf2d2ac | 361 | return (NULL); |
8bf2d2ac | 362 | le->canonical = archive_entry_clone(entry); |
a44e961d | 363 | |
6ed811d1 PA |
364 | /* If the links cache is getting too full, enlarge the hash table. */ |
365 | if (res->number_entries > res->number_buckets * 2) | |
366 | grow_hash(res); | |
367 | ||
59bf7050 | 368 | hash = (size_t)(archive_entry_dev(entry) ^ archive_entry_ino64(entry)); |
c09f92d2 | 369 | bucket = hash & (res->number_buckets - 1); |
6ed811d1 | 370 | |
a44e961d | 371 | /* If we could allocate the entry, record it. */ |
6ed811d1 PA |
372 | if (res->buckets[bucket] != NULL) |
373 | res->buckets[bucket]->previous = le; | |
374 | res->number_entries++; | |
375 | le->next = res->buckets[bucket]; | |
a44e961d | 376 | le->previous = NULL; |
6ed811d1 PA |
377 | res->buckets[bucket] = le; |
378 | le->hash = hash; | |
379 | le->links = archive_entry_nlink(entry) - 1; | |
8bf2d2ac | 380 | return (le); |
6ed811d1 PA |
381 | } |
382 | ||
383 | static void | |
384 | grow_hash(struct archive_entry_linkresolver *res) | |
385 | { | |
386 | struct links_entry *le, **new_buckets; | |
387 | size_t new_size; | |
388 | size_t i, bucket; | |
389 | ||
390 | /* Try to enlarge the bucket list. */ | |
391 | new_size = res->number_buckets * 2; | |
c09f92d2 PA |
392 | if (new_size < res->number_buckets) |
393 | return; | |
394 | new_buckets = calloc(new_size, sizeof(struct links_entry *)); | |
395 | ||
396 | if (new_buckets == NULL) | |
397 | return; | |
398 | ||
399 | for (i = 0; i < res->number_buckets; i++) { | |
400 | while (res->buckets[i] != NULL) { | |
401 | /* Remove entry from old bucket. */ | |
402 | le = res->buckets[i]; | |
403 | res->buckets[i] = le->next; | |
404 | ||
405 | /* Add entry to new bucket. */ | |
406 | bucket = le->hash & (new_size - 1); | |
407 | ||
408 | if (new_buckets[bucket] != NULL) | |
409 | new_buckets[bucket]->previous = le; | |
410 | le->next = new_buckets[bucket]; | |
411 | le->previous = NULL; | |
412 | new_buckets[bucket] = le; | |
6ed811d1 | 413 | } |
6ed811d1 | 414 | } |
c09f92d2 PA |
415 | free(res->buckets); |
416 | res->buckets = new_buckets; | |
417 | res->number_buckets = new_size; | |
418 | } | |
419 | ||
420 | struct archive_entry * | |
421 | archive_entry_partial_links(struct archive_entry_linkresolver *res, | |
422 | unsigned int *links) | |
423 | { | |
424 | struct archive_entry *e; | |
425 | struct links_entry *le; | |
426 | ||
427 | /* Free a held entry. */ | |
428 | if (res->spare != NULL) { | |
429 | archive_entry_free(res->spare->canonical); | |
430 | archive_entry_free(res->spare->entry); | |
431 | free(res->spare); | |
432 | res->spare = NULL; | |
433 | } | |
434 | ||
435 | le = next_entry(res, NEXT_ENTRY_PARTIAL); | |
436 | if (le != NULL) { | |
437 | e = le->canonical; | |
438 | if (links != NULL) | |
439 | *links = le->links; | |
440 | le->canonical = NULL; | |
441 | } else { | |
442 | e = NULL; | |
443 | if (links != NULL) | |
444 | *links = 0; | |
445 | } | |
446 | return (e); | |
a44e961d | 447 | } |