Commit | Line | Data |
---|---|---|
a44e961d PA |
1 | /*- |
2 | * Copyright (c) 2003-2007 Tim Kientzle | |
3 | * All rights reserved. | |
4 | * | |
5 | * Redistribution and use in source and binary forms, with or without | |
6 | * modification, are permitted provided that the following conditions | |
7 | * are met: | |
8 | * 1. Redistributions of source code must retain the above copyright | |
9 | * notice, this list of conditions and the following disclaimer. | |
10 | * 2. Redistributions in binary form must reproduce the above copyright | |
11 | * notice, this list of conditions and the following disclaimer in the | |
12 | * documentation and/or other materials provided with the distribution. | |
13 | * | |
14 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR | |
15 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
16 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | |
17 | * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, | |
18 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
19 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
20 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
21 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | |
23 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
24 | */ | |
25 | ||
26 | #include "archive_platform.h" | |
9c82a63e | 27 | __FBSDID("$FreeBSD: head/lib/libarchive/archive_entry_link_resolver.c 201100 2009-12-28 03:05:31Z kientzle $"); |
a44e961d PA |
28 | |
29 | #ifdef HAVE_SYS_STAT_H | |
30 | #include <sys/stat.h> | |
31 | #endif | |
32 | #ifdef HAVE_ERRNO_H | |
33 | #include <errno.h> | |
34 | #endif | |
35 | #include <stdio.h> | |
36 | #ifdef HAVE_STDLIB_H | |
37 | #include <stdlib.h> | |
38 | #endif | |
39 | #ifdef HAVE_STRING_H | |
40 | #include <string.h> | |
41 | #endif | |
42 | ||
8bf2d2ac | 43 | #include "archive.h" |
a44e961d PA |
44 | #include "archive_entry.h" |
45 | ||
6ed811d1 PA |
46 | /* |
47 | * This is mostly a pretty straightforward hash table implementation. | |
48 | * The only interesting bit is the different strategies used to | |
49 | * match up links. These strategies match those used by various | |
50 | * archiving formats: | |
51 | * tar - content stored with first link, remainder refer back to it. | |
52 | * This requires us to match each subsequent link up with the | |
53 | * first appearance. | |
54 | * cpio - Old cpio just stored body with each link, match-ups were | |
55 | * implicit. This is trivial. | |
56 | * new cpio - New cpio only stores body with last link, match-ups | |
57 | * are implicit. This is actually quite tricky; see the notes | |
58 | * below. | |
59 | */ | |
60 | ||
8bf2d2ac PA |
61 | /* Users pass us a format code, we translate that into a strategy here. */ |
62 | #define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0 | |
63 | #define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1 | |
64 | #define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2 | |
65 | #define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3 | |
66 | ||
a44e961d PA |
67 | /* Initial size of link cache. */ |
68 | #define links_cache_initial_size 1024 | |
69 | ||
6ed811d1 PA |
70 | struct links_entry { |
71 | struct links_entry *next; | |
72 | struct links_entry *previous; | |
73 | int links; /* # links not yet seen */ | |
74 | int hash; | |
8bf2d2ac | 75 | struct archive_entry *canonical; |
6ed811d1 PA |
76 | struct archive_entry *entry; |
77 | }; | |
78 | ||
a44e961d | 79 | struct archive_entry_linkresolver { |
6ed811d1 PA |
80 | struct links_entry **buckets; |
81 | struct links_entry *spare; | |
a44e961d PA |
82 | unsigned long number_entries; |
83 | size_t number_buckets; | |
6ed811d1 | 84 | int strategy; |
a44e961d PA |
85 | }; |
86 | ||
6ed811d1 PA |
87 | static struct links_entry *find_entry(struct archive_entry_linkresolver *, |
88 | struct archive_entry *); | |
89 | static void grow_hash(struct archive_entry_linkresolver *); | |
8bf2d2ac | 90 | static struct links_entry *insert_entry(struct archive_entry_linkresolver *, |
6ed811d1 | 91 | struct archive_entry *); |
8bf2d2ac | 92 | static struct links_entry *next_entry(struct archive_entry_linkresolver *); |
a44e961d PA |
93 | |
94 | struct archive_entry_linkresolver * | |
95 | archive_entry_linkresolver_new(void) | |
96 | { | |
6ed811d1 | 97 | struct archive_entry_linkresolver *res; |
a44e961d PA |
98 | size_t i; |
99 | ||
6ed811d1 PA |
100 | res = malloc(sizeof(struct archive_entry_linkresolver)); |
101 | if (res == NULL) | |
a44e961d | 102 | return (NULL); |
6ed811d1 PA |
103 | memset(res, 0, sizeof(struct archive_entry_linkresolver)); |
104 | res->number_buckets = links_cache_initial_size; | |
105 | res->buckets = malloc(res->number_buckets * | |
106 | sizeof(res->buckets[0])); | |
107 | if (res->buckets == NULL) { | |
108 | free(res); | |
a44e961d PA |
109 | return (NULL); |
110 | } | |
6ed811d1 PA |
111 | for (i = 0; i < res->number_buckets; i++) |
112 | res->buckets[i] = NULL; | |
113 | return (res); | |
114 | } | |
115 | ||
116 | void | |
117 | archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res, | |
8bf2d2ac | 118 | int fmt) |
6ed811d1 | 119 | { |
8bf2d2ac PA |
120 | int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK; |
121 | ||
122 | switch (fmtbase) { | |
123 | case ARCHIVE_FORMAT_CPIO: | |
124 | switch (fmt) { | |
125 | case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC: | |
126 | case ARCHIVE_FORMAT_CPIO_SVR4_CRC: | |
127 | res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO; | |
128 | break; | |
129 | default: | |
130 | res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; | |
131 | break; | |
132 | } | |
133 | break; | |
134 | case ARCHIVE_FORMAT_MTREE: | |
135 | res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE; | |
136 | break; | |
137 | case ARCHIVE_FORMAT_TAR: | |
138 | res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR; | |
139 | break; | |
140 | default: | |
141 | res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR; | |
142 | break; | |
143 | } | |
a44e961d PA |
144 | } |
145 | ||
146 | void | |
6ed811d1 | 147 | archive_entry_linkresolver_free(struct archive_entry_linkresolver *res) |
a44e961d | 148 | { |
8bf2d2ac | 149 | struct links_entry *le; |
a44e961d | 150 | |
8bf2d2ac | 151 | if (res == NULL) |
a44e961d PA |
152 | return; |
153 | ||
8bf2d2ac PA |
154 | if (res->buckets != NULL) { |
155 | while ((le = next_entry(res)) != NULL) | |
156 | archive_entry_free(le->entry); | |
157 | free(res->buckets); | |
158 | res->buckets = NULL; | |
a44e961d | 159 | } |
8bf2d2ac | 160 | free(res); |
6ed811d1 | 161 | } |
a44e961d | 162 | |
6ed811d1 PA |
163 | void |
164 | archive_entry_linkify(struct archive_entry_linkresolver *res, | |
165 | struct archive_entry **e, struct archive_entry **f) | |
166 | { | |
167 | struct links_entry *le; | |
168 | struct archive_entry *t; | |
a44e961d | 169 | |
6ed811d1 | 170 | *f = NULL; /* Default: Don't return a second entry. */ |
a44e961d | 171 | |
8bf2d2ac PA |
172 | if (*e == NULL) { |
173 | le = next_entry(res); | |
174 | if (le != NULL) { | |
175 | *e = le->entry; | |
176 | le->entry = NULL; | |
177 | } | |
178 | return; | |
179 | } | |
180 | ||
6ed811d1 PA |
181 | /* If it has only one link, then we're done. */ |
182 | if (archive_entry_nlink(*e) == 1) | |
183 | return; | |
9c82a63e PA |
184 | /* Directories, devices never have hardlinks. */ |
185 | if (archive_entry_filetype(*e) == AE_IFDIR | |
186 | || archive_entry_filetype(*e) == AE_IFBLK | |
187 | || archive_entry_filetype(*e) == AE_IFCHR) | |
08895e67 | 188 | return; |
a44e961d | 189 | |
6ed811d1 PA |
190 | switch (res->strategy) { |
191 | case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR: | |
192 | le = find_entry(res, *e); | |
193 | if (le != NULL) { | |
8029ab02 | 194 | archive_entry_unset_size(*e); |
8bf2d2ac PA |
195 | archive_entry_copy_hardlink(*e, |
196 | archive_entry_pathname(le->canonical)); | |
197 | } else | |
198 | insert_entry(res, *e); | |
199 | return; | |
200 | case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE: | |
201 | le = find_entry(res, *e); | |
202 | if (le != NULL) { | |
203 | archive_entry_copy_hardlink(*e, | |
204 | archive_entry_pathname(le->canonical)); | |
6ed811d1 PA |
205 | } else |
206 | insert_entry(res, *e); | |
207 | return; | |
208 | case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO: | |
209 | /* This one is trivial. */ | |
210 | return; | |
211 | case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO: | |
212 | le = find_entry(res, *e); | |
213 | if (le != NULL) { | |
8bf2d2ac PA |
214 | /* |
215 | * Put the new entry in le, return the | |
216 | * old entry from le. | |
217 | */ | |
6ed811d1 PA |
218 | t = *e; |
219 | *e = le->entry; | |
220 | le->entry = t; | |
8bf2d2ac | 221 | /* Make the old entry into a hardlink. */ |
8029ab02 | 222 | archive_entry_unset_size(*e); |
8bf2d2ac PA |
223 | archive_entry_copy_hardlink(*e, |
224 | archive_entry_pathname(le->canonical)); | |
225 | /* If we ran out of links, return the | |
226 | * final entry as well. */ | |
6ed811d1 PA |
227 | if (le->links == 0) { |
228 | *f = le->entry; | |
8bf2d2ac | 229 | le->entry = NULL; |
a44e961d | 230 | } |
6ed811d1 | 231 | } else { |
8bf2d2ac PA |
232 | /* |
233 | * If we haven't seen it, tuck it away | |
234 | * for future use. | |
235 | */ | |
236 | le = insert_entry(res, *e); | |
237 | le->entry = *e; | |
6ed811d1 | 238 | *e = NULL; |
a44e961d | 239 | } |
6ed811d1 PA |
240 | return; |
241 | default: | |
242 | break; | |
a44e961d | 243 | } |
6ed811d1 PA |
244 | return; |
245 | } | |
246 | ||
247 | static struct links_entry * | |
248 | find_entry(struct archive_entry_linkresolver *res, | |
249 | struct archive_entry *entry) | |
250 | { | |
251 | struct links_entry *le; | |
252 | int hash, bucket; | |
253 | dev_t dev; | |
9c82a63e | 254 | int64_t ino; |
6ed811d1 PA |
255 | |
256 | /* Free a held entry. */ | |
257 | if (res->spare != NULL) { | |
8bf2d2ac | 258 | archive_entry_free(res->spare->canonical); |
6ed811d1 PA |
259 | archive_entry_free(res->spare->entry); |
260 | free(res->spare); | |
261 | res->spare = NULL; | |
262 | } | |
263 | ||
264 | /* If the links cache overflowed and got flushed, don't bother. */ | |
265 | if (res->buckets == NULL) | |
266 | return (NULL); | |
267 | ||
268 | dev = archive_entry_dev(entry); | |
9c82a63e PA |
269 | ino = archive_entry_ino64(entry); |
270 | hash = (int)(dev ^ ino); | |
a44e961d PA |
271 | |
272 | /* Try to locate this entry in the links cache. */ | |
6ed811d1 PA |
273 | bucket = hash % res->number_buckets; |
274 | for (le = res->buckets[bucket]; le != NULL; le = le->next) { | |
275 | if (le->hash == hash | |
8bf2d2ac | 276 | && dev == archive_entry_dev(le->canonical) |
9c82a63e | 277 | && ino == archive_entry_ino64(le->canonical)) { |
a44e961d PA |
278 | /* |
279 | * Decrement link count each time and release | |
280 | * the entry if it hits zero. This saves | |
281 | * memory and is necessary for detecting | |
282 | * missed links. | |
283 | */ | |
284 | --le->links; | |
285 | if (le->links > 0) | |
6ed811d1 PA |
286 | return (le); |
287 | /* Remove it from this hash bucket. */ | |
a44e961d PA |
288 | if (le->previous != NULL) |
289 | le->previous->next = le->next; | |
290 | if (le->next != NULL) | |
291 | le->next->previous = le->previous; | |
6ed811d1 PA |
292 | if (res->buckets[bucket] == le) |
293 | res->buckets[bucket] = le->next; | |
294 | res->number_entries--; | |
295 | /* Defer freeing this entry. */ | |
296 | res->spare = le; | |
297 | return (le); | |
a44e961d PA |
298 | } |
299 | } | |
6ed811d1 PA |
300 | return (NULL); |
301 | } | |
302 | ||
8bf2d2ac PA |
303 | static struct links_entry * |
304 | next_entry(struct archive_entry_linkresolver *res) | |
305 | { | |
306 | struct links_entry *le; | |
307 | size_t bucket; | |
308 | ||
309 | /* Free a held entry. */ | |
310 | if (res->spare != NULL) { | |
311 | archive_entry_free(res->spare->canonical); | |
312 | free(res->spare); | |
313 | res->spare = NULL; | |
314 | } | |
315 | ||
316 | /* If the links cache overflowed and got flushed, don't bother. */ | |
317 | if (res->buckets == NULL) | |
318 | return (NULL); | |
319 | ||
320 | /* Look for next non-empty bucket in the links cache. */ | |
321 | for (bucket = 0; bucket < res->number_buckets; bucket++) { | |
322 | le = res->buckets[bucket]; | |
323 | if (le != NULL) { | |
324 | /* Remove it from this hash bucket. */ | |
325 | if (le->next != NULL) | |
326 | le->next->previous = le->previous; | |
327 | res->buckets[bucket] = le->next; | |
328 | res->number_entries--; | |
329 | /* Defer freeing this entry. */ | |
330 | res->spare = le; | |
331 | return (le); | |
332 | } | |
333 | } | |
334 | return (NULL); | |
335 | } | |
336 | ||
337 | static struct links_entry * | |
6ed811d1 PA |
338 | insert_entry(struct archive_entry_linkresolver *res, |
339 | struct archive_entry *entry) | |
340 | { | |
341 | struct links_entry *le; | |
342 | int hash, bucket; | |
a44e961d PA |
343 | |
344 | /* Add this entry to the links cache. */ | |
345 | le = malloc(sizeof(struct links_entry)); | |
346 | if (le == NULL) | |
8bf2d2ac PA |
347 | return (NULL); |
348 | memset(le, 0, sizeof(*le)); | |
349 | le->canonical = archive_entry_clone(entry); | |
a44e961d | 350 | |
6ed811d1 PA |
351 | /* If the links cache is getting too full, enlarge the hash table. */ |
352 | if (res->number_entries > res->number_buckets * 2) | |
353 | grow_hash(res); | |
354 | ||
9c82a63e | 355 | hash = archive_entry_dev(entry) ^ archive_entry_ino64(entry); |
6ed811d1 PA |
356 | bucket = hash % res->number_buckets; |
357 | ||
a44e961d | 358 | /* If we could allocate the entry, record it. */ |
6ed811d1 PA |
359 | if (res->buckets[bucket] != NULL) |
360 | res->buckets[bucket]->previous = le; | |
361 | res->number_entries++; | |
362 | le->next = res->buckets[bucket]; | |
a44e961d | 363 | le->previous = NULL; |
6ed811d1 PA |
364 | res->buckets[bucket] = le; |
365 | le->hash = hash; | |
366 | le->links = archive_entry_nlink(entry) - 1; | |
8bf2d2ac | 367 | return (le); |
6ed811d1 PA |
368 | } |
369 | ||
370 | static void | |
371 | grow_hash(struct archive_entry_linkresolver *res) | |
372 | { | |
373 | struct links_entry *le, **new_buckets; | |
374 | size_t new_size; | |
375 | size_t i, bucket; | |
376 | ||
377 | /* Try to enlarge the bucket list. */ | |
378 | new_size = res->number_buckets * 2; | |
379 | new_buckets = malloc(new_size * sizeof(struct links_entry *)); | |
380 | ||
381 | if (new_buckets != NULL) { | |
382 | memset(new_buckets, 0, | |
383 | new_size * sizeof(struct links_entry *)); | |
384 | for (i = 0; i < res->number_buckets; i++) { | |
385 | while (res->buckets[i] != NULL) { | |
386 | /* Remove entry from old bucket. */ | |
387 | le = res->buckets[i]; | |
388 | res->buckets[i] = le->next; | |
389 | ||
390 | /* Add entry to new bucket. */ | |
391 | bucket = le->hash % new_size; | |
392 | ||
393 | if (new_buckets[bucket] != NULL) | |
394 | new_buckets[bucket]->previous = | |
395 | le; | |
396 | le->next = new_buckets[bucket]; | |
397 | le->previous = NULL; | |
398 | new_buckets[bucket] = le; | |
399 | } | |
400 | } | |
401 | free(res->buckets); | |
402 | res->buckets = new_buckets; | |
403 | res->number_buckets = new_size; | |
404 | } | |
a44e961d | 405 | } |