Commit | Line | Data |
---|---|---|
984263bc MD |
1 | /*- |
2 | * Copyright (c) 1992 Keith Muller. | |
3 | * Copyright (c) 1992, 1993 | |
4 | * The Regents of the University of California. All rights reserved. | |
5 | * | |
6 | * This code is derived from software contributed to Berkeley by | |
7 | * Keith Muller of the University of California, San Diego. | |
8 | * | |
9 | * Redistribution and use in source and binary forms, with or without | |
10 | * modification, are permitted provided that the following conditions | |
11 | * are met: | |
12 | * 1. Redistributions of source code must retain the above copyright | |
13 | * notice, this list of conditions and the following disclaimer. | |
14 | * 2. Redistributions in binary form must reproduce the above copyright | |
15 | * notice, this list of conditions and the following disclaimer in the | |
16 | * documentation and/or other materials provided with the distribution. | |
dc71b7ab | 17 | * 3. Neither the name of the University nor the names of its contributors |
984263bc MD |
18 | * may be used to endorse or promote products derived from this software |
19 | * without specific prior written permission. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
24 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
31 | * SUCH DAMAGE. | |
1de703da MD |
32 | * |
33 | * @(#)tables.c 8.1 (Berkeley) 5/31/93 | |
34 | * $FreeBSD: src/bin/pax/tables.c,v 1.13.2.1 2001/08/01 05:03:12 obrien Exp $ | |
984263bc MD |
35 | */ |
36 | ||
984263bc MD |
37 | #include <sys/types.h> |
38 | #include <sys/time.h> | |
39 | #include <sys/stat.h> | |
40 | #include <sys/fcntl.h> | |
41 | #include <errno.h> | |
42 | #include <stdio.h> | |
43 | #include <stdlib.h> | |
44 | #include <string.h> | |
45 | #include <unistd.h> | |
46 | #include "pax.h" | |
47 | #include "tables.h" | |
48 | #include "extern.h" | |
49 | ||
50 | /* | |
51 | * Routines for controlling the contents of all the different databases pax | |
52 | * keeps. Tables are dynamically created only when they are needed. The | |
53 | * goal was speed and the ability to work with HUGE archives. The databases | |
54 | * were kept simple, but do have complex rules for when the contents change. | |
55 | * As of this writing, the POSIX library functions were more complex than | |
56 | * needed for this application (pax databases have very short lifetimes and | |
57 | * do not survive after pax is finished). Pax is required to handle very | |
58 | * large archives. These database routines carefully combine memory usage and | |
59 | * temporary file storage in ways which will not significantly impact runtime | |
60 | * performance while allowing the largest possible archives to be handled. | |
418e7697 | 61 | * Trying to force the fit to the POSIX database routines was not considered |
984263bc MD |
62 | * time well spent. |
63 | */ | |
64 | ||
65 | static HRDLNK **ltab = NULL; /* hard link table for detecting hard links */ | |
66 | static FTM **ftab = NULL; /* file time table for updating arch */ | |
67 | static NAMT **ntab = NULL; /* interactive rename storage table */ | |
68 | static DEVT **dtab = NULL; /* device/inode mapping tables */ | |
69 | static ATDIR **atab = NULL; /* file tree directory time reset table */ | |
70 | static int dirfd = -1; /* storage for setting created dir time/mode */ | |
71 | static u_long dircnt; /* entries in dir time/mode storage */ | |
72 | static int ffd = -1; /* tmp file for file time table name storage */ | |
73 | ||
9dbf638f | 74 | static DEVT *chk_dev (dev_t, int); |
984263bc MD |
75 | |
76 | /* | |
77 | * hard link table routines | |
78 | * | |
79 | * The hard link table tries to detect hard links to files using the device and | |
80 | * inode values. We do this when writing an archive, so we can tell the format | |
81 | * write routine that this file is a hard link to another file. The format | |
82 | * write routine then can store this file in whatever way it wants (as a hard | |
83 | * link if the format supports that like tar, or ignore this info like cpio). | |
84 | * (Actually a field in the format driver table tells us if the format wants | |
85 | * hard link info. if not, we do not waste time looking for them). We also use | |
86 | * the same table when reading an archive. In that situation, this table is | |
87 | * used by the format read routine to detect hard links from stored dev and | |
88 | * inode numbers (like cpio). This will allow pax to create a link when one | |
89 | * can be detected by the archive format. | |
90 | */ | |
91 | ||
92 | /* | |
93 | * lnk_start | |
94 | * Creates the hard link table. | |
95 | * Return: | |
96 | * 0 if created, -1 if failure | |
97 | */ | |
98 | ||
984263bc MD |
99 | int |
100 | lnk_start(void) | |
984263bc MD |
101 | { |
102 | if (ltab != NULL) | |
103 | return(0); | |
6e278935 | 104 | if ((ltab = (HRDLNK **)calloc(L_TAB_SZ, sizeof(HRDLNK *))) == NULL) { |
984263bc MD |
105 | paxwarn(1, "Cannot allocate memory for hard link table"); |
106 | return(-1); | |
107 | } | |
108 | return(0); | |
109 | } | |
110 | ||
111 | /* | |
112 | * chk_lnk() | |
113 | * Looks up entry in hard link hash table. If found, it copies the name | |
114 | * of the file it is linked to (we already saw that file) into ln_name. | |
115 | * lnkcnt is decremented and if goes to 1 the node is deleted from the | |
116 | * database. (We have seen all the links to this file). If not found, | |
117 | * we add the file to the database if it has the potential for having | |
118 | * hard links to other files we may process (it has a link count > 1) | |
119 | * Return: | |
120 | * if found returns 1; if not found returns 0; -1 on error | |
121 | */ | |
122 | ||
984263bc | 123 | int |
86a586bb | 124 | chk_lnk(ARCHD *arcn) |
984263bc | 125 | { |
86a586bb LF |
126 | HRDLNK *pt; |
127 | HRDLNK **ppt; | |
128 | u_int indx; | |
984263bc MD |
129 | |
130 | if (ltab == NULL) | |
131 | return(-1); | |
132 | /* | |
133 | * ignore those nodes that cannot have hard links | |
134 | */ | |
135 | if ((arcn->type == PAX_DIR) || (arcn->sb.st_nlink <= 1)) | |
136 | return(0); | |
137 | ||
138 | /* | |
139 | * hash inode number and look for this file | |
140 | */ | |
141 | indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ; | |
142 | if ((pt = ltab[indx]) != NULL) { | |
143 | /* | |
144 | * it's hash chain in not empty, walk down looking for it | |
145 | */ | |
146 | ppt = &(ltab[indx]); | |
147 | while (pt != NULL) { | |
148 | if ((pt->ino == arcn->sb.st_ino) && | |
149 | (pt->dev == arcn->sb.st_dev)) | |
150 | break; | |
151 | ppt = &(pt->fow); | |
152 | pt = pt->fow; | |
153 | } | |
154 | ||
155 | if (pt != NULL) { | |
156 | /* | |
157 | * found a link. set the node type and copy in the | |
158 | * name of the file it is to link to. we need to | |
159 | * handle hardlinks to regular files differently than | |
160 | * other links. | |
161 | */ | |
162 | arcn->ln_nlen = l_strncpy(arcn->ln_name, pt->name, | |
163 | sizeof(arcn->ln_name) - 1); | |
164 | arcn->ln_name[arcn->ln_nlen] = '\0'; | |
165 | if (arcn->type == PAX_REG) | |
166 | arcn->type = PAX_HRG; | |
167 | else | |
168 | arcn->type = PAX_HLK; | |
169 | ||
170 | /* | |
171 | * if we have found all the links to this file, remove | |
172 | * it from the database | |
173 | */ | |
174 | if (--pt->nlink <= 1) { | |
175 | *ppt = pt->fow; | |
57fed2af EN |
176 | free((char *)pt->name); |
177 | free((char *)pt); | |
984263bc MD |
178 | } |
179 | return(1); | |
180 | } | |
181 | } | |
182 | ||
183 | /* | |
184 | * we never saw this file before. It has links so we add it to the | |
185 | * front of this hash chain | |
186 | */ | |
187 | if ((pt = (HRDLNK *)malloc(sizeof(HRDLNK))) != NULL) { | |
188 | if ((pt->name = strdup(arcn->name)) != NULL) { | |
189 | pt->dev = arcn->sb.st_dev; | |
190 | pt->ino = arcn->sb.st_ino; | |
191 | pt->nlink = arcn->sb.st_nlink; | |
192 | pt->fow = ltab[indx]; | |
193 | ltab[indx] = pt; | |
194 | return(0); | |
195 | } | |
57fed2af | 196 | free((char *)pt); |
984263bc MD |
197 | } |
198 | ||
199 | paxwarn(1, "Hard link table out of memory"); | |
200 | return(-1); | |
201 | } | |
202 | ||
203 | /* | |
204 | * purg_lnk | |
205 | * remove reference for a file that we may have added to the data base as | |
206 | * a potential source for hard links. We ended up not using the file, so | |
207 | * we do not want to accidently point another file at it later on. | |
208 | */ | |
209 | ||
984263bc | 210 | void |
86a586bb | 211 | purg_lnk(ARCHD *arcn) |
984263bc | 212 | { |
86a586bb LF |
213 | HRDLNK *pt; |
214 | HRDLNK **ppt; | |
215 | u_int indx; | |
984263bc MD |
216 | |
217 | if (ltab == NULL) | |
218 | return; | |
219 | /* | |
220 | * do not bother to look if it could not be in the database | |
221 | */ | |
222 | if ((arcn->sb.st_nlink <= 1) || (arcn->type == PAX_DIR) || | |
223 | (arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) | |
224 | return; | |
225 | ||
226 | /* | |
227 | * find the hash chain for this inode value, if empty return | |
228 | */ | |
229 | indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ; | |
230 | if ((pt = ltab[indx]) == NULL) | |
231 | return; | |
232 | ||
233 | /* | |
234 | * walk down the list looking for the inode/dev pair, unlink and | |
235 | * free if found | |
236 | */ | |
237 | ppt = &(ltab[indx]); | |
238 | while (pt != NULL) { | |
239 | if ((pt->ino == arcn->sb.st_ino) && | |
240 | (pt->dev == arcn->sb.st_dev)) | |
241 | break; | |
242 | ppt = &(pt->fow); | |
243 | pt = pt->fow; | |
244 | } | |
245 | if (pt == NULL) | |
246 | return; | |
247 | ||
248 | /* | |
249 | * remove and free it | |
250 | */ | |
251 | *ppt = pt->fow; | |
57fed2af EN |
252 | free((char *)pt->name); |
253 | free((char *)pt); | |
984263bc MD |
254 | } |
255 | ||
256 | /* | |
257 | * lnk_end() | |
258 | * pull apart a existing link table so we can reuse it. We do this between | |
259 | * read and write phases of append with update. (The format may have | |
260 | * used the link table, and we need to start with a fresh table for the | |
261 | * write phase | |
262 | */ | |
263 | ||
984263bc MD |
264 | void |
265 | lnk_end(void) | |
984263bc | 266 | { |
86a586bb LF |
267 | int i; |
268 | HRDLNK *pt; | |
269 | HRDLNK *ppt; | |
984263bc MD |
270 | |
271 | if (ltab == NULL) | |
272 | return; | |
273 | ||
274 | for (i = 0; i < L_TAB_SZ; ++i) { | |
275 | if (ltab[i] == NULL) | |
276 | continue; | |
277 | pt = ltab[i]; | |
278 | ltab[i] = NULL; | |
279 | ||
280 | /* | |
281 | * free up each entry on this chain | |
282 | */ | |
283 | while (pt != NULL) { | |
284 | ppt = pt; | |
285 | pt = ppt->fow; | |
57fed2af EN |
286 | free((char *)ppt->name); |
287 | free((char *)ppt); | |
984263bc MD |
288 | } |
289 | } | |
290 | return; | |
291 | } | |
292 | ||
293 | /* | |
294 | * modification time table routines | |
295 | * | |
296 | * The modification time table keeps track of last modification times for all | |
297 | * files stored in an archive during a write phase when -u is set. We only | |
298 | * add a file to the archive if it is newer than a file with the same name | |
299 | * already stored on the archive (if there is no other file with the same | |
300 | * name on the archive it is added). This applies to writes and appends. | |
301 | * An append with an -u must read the archive and store the modification time | |
302 | * for every file on that archive before starting the write phase. It is clear | |
303 | * that this is one HUGE database. To save memory space, the actual file names | |
418e7697 | 304 | * are stored in a scratch file and indexed by an in memory hash table. The |
984263bc MD |
305 | * hash table is indexed by hashing the file path. The nodes in the table store |
306 | * the length of the filename and the lseek offset within the scratch file | |
418e7697 PA |
307 | * where the actual name is stored. Since there are never any deletions from |
308 | * this table, fragmentation of the scratch file is never a issue. Lookups | |
309 | * seem to not exhibit any locality at all (files in the database are rarely | |
310 | * looked up more than once...), so caching is just a waste of memory. The | |
311 | * only limitation is the amount of scratch file space available to store the | |
984263bc MD |
312 | * path names. |
313 | */ | |
314 | ||
315 | /* | |
316 | * ftime_start() | |
317 | * create the file time hash table and open for read/write the scratch | |
318 | * file. (after created it is unlinked, so when we exit we leave | |
319 | * no witnesses). | |
320 | * Return: | |
321 | * 0 if the table and file was created ok, -1 otherwise | |
322 | */ | |
323 | ||
984263bc MD |
324 | int |
325 | ftime_start(void) | |
984263bc MD |
326 | { |
327 | ||
328 | if (ftab != NULL) | |
329 | return(0); | |
6e278935 | 330 | if ((ftab = (FTM **)calloc(F_TAB_SZ, sizeof(FTM *))) == NULL) { |
984263bc MD |
331 | paxwarn(1, "Cannot allocate memory for file time table"); |
332 | return(-1); | |
333 | } | |
334 | ||
335 | /* | |
336 | * get random name and create temporary scratch file, unlink name | |
337 | * so it will get removed on exit | |
338 | */ | |
339 | memcpy(tempbase, _TFILE_BASE, sizeof(_TFILE_BASE)); | |
340 | if ((ffd = mkstemp(tempfile)) < 0) { | |
341 | syswarn(1, errno, "Unable to create temporary file: %s", | |
342 | tempfile); | |
343 | return(-1); | |
344 | } | |
57fed2af | 345 | unlink(tempfile); |
984263bc MD |
346 | |
347 | return(0); | |
348 | } | |
349 | ||
350 | /* | |
351 | * chk_ftime() | |
352 | * looks up entry in file time hash table. If not found, the file is | |
353 | * added to the hash table and the file named stored in the scratch file. | |
354 | * If a file with the same name is found, the file times are compared and | |
355 | * the most recent file time is retained. If the new file was younger (or | |
356 | * was not in the database) the new file is selected for storage. | |
357 | * Return: | |
358 | * 0 if file should be added to the archive, 1 if it should be skipped, | |
359 | * -1 on error | |
360 | */ | |
361 | ||
984263bc | 362 | int |
86a586bb | 363 | chk_ftime(ARCHD *arcn) |
984263bc | 364 | { |
86a586bb LF |
365 | FTM *pt; |
366 | int namelen; | |
367 | u_int indx; | |
984263bc MD |
368 | char ckname[PAXPATHLEN+1]; |
369 | ||
370 | /* | |
371 | * no info, go ahead and add to archive | |
372 | */ | |
373 | if (ftab == NULL) | |
374 | return(0); | |
375 | ||
376 | /* | |
377 | * hash the pathname and look up in table | |
378 | */ | |
379 | namelen = arcn->nlen; | |
380 | indx = st_hash(arcn->name, namelen, F_TAB_SZ); | |
381 | if ((pt = ftab[indx]) != NULL) { | |
382 | /* | |
383 | * the hash chain is not empty, walk down looking for match | |
384 | * only read up the path names if the lengths match, speeds | |
385 | * up the search a lot | |
386 | */ | |
387 | while (pt != NULL) { | |
388 | if (pt->namelen == namelen) { | |
389 | /* | |
390 | * potential match, have to read the name | |
391 | * from the scratch file. | |
392 | */ | |
393 | if (lseek(ffd,pt->seek,SEEK_SET) != pt->seek) { | |
394 | syswarn(1, errno, | |
395 | "Failed ftime table seek"); | |
396 | return(-1); | |
397 | } | |
398 | if (read(ffd, ckname, namelen) != namelen) { | |
399 | syswarn(1, errno, | |
400 | "Failed ftime table read"); | |
401 | return(-1); | |
402 | } | |
403 | ||
404 | /* | |
405 | * if the names match, we are done | |
406 | */ | |
407 | if (!strncmp(ckname, arcn->name, namelen)) | |
408 | break; | |
409 | } | |
410 | ||
411 | /* | |
412 | * try the next entry on the chain | |
413 | */ | |
414 | pt = pt->fow; | |
415 | } | |
416 | ||
417 | if (pt != NULL) { | |
418 | /* | |
419 | * found the file, compare the times, save the newer | |
420 | */ | |
421 | if (arcn->sb.st_mtime > pt->mtime) { | |
422 | /* | |
423 | * file is newer | |
424 | */ | |
425 | pt->mtime = arcn->sb.st_mtime; | |
426 | return(0); | |
427 | } | |
428 | /* | |
429 | * file is older | |
430 | */ | |
431 | return(1); | |
432 | } | |
433 | } | |
434 | ||
435 | /* | |
436 | * not in table, add it | |
437 | */ | |
438 | if ((pt = (FTM *)malloc(sizeof(FTM))) != NULL) { | |
439 | /* | |
440 | * add the name at the end of the scratch file, saving the | |
441 | * offset. add the file to the head of the hash chain | |
442 | */ | |
443 | if ((pt->seek = lseek(ffd, (off_t)0, SEEK_END)) >= 0) { | |
444 | if (write(ffd, arcn->name, namelen) == namelen) { | |
445 | pt->mtime = arcn->sb.st_mtime; | |
446 | pt->namelen = namelen; | |
447 | pt->fow = ftab[indx]; | |
448 | ftab[indx] = pt; | |
449 | return(0); | |
450 | } | |
451 | syswarn(1, errno, "Failed write to file time table"); | |
452 | } else | |
453 | syswarn(1, errno, "Failed seek on file time table"); | |
454 | } else | |
455 | paxwarn(1, "File time table ran out of memory"); | |
456 | ||
457 | if (pt != NULL) | |
57fed2af | 458 | free((char *)pt); |
984263bc MD |
459 | return(-1); |
460 | } | |
461 | ||
462 | /* | |
463 | * Interactive rename table routines | |
464 | * | |
465 | * The interactive rename table keeps track of the new names that the user | |
466 | * assigns to files from tty input. Since this map is unique for each file | |
467 | * we must store it in case there is a reference to the file later in archive | |
468 | * (a link). Otherwise we will be unable to find the file we know was | |
469 | * extracted. The remapping of these files is stored in a memory based hash | |
470 | * table (it is assumed since input must come from /dev/tty, it is unlikely to | |
471 | * be a very large table). | |
472 | */ | |
473 | ||
474 | /* | |
475 | * name_start() | |
476 | * create the interactive rename table | |
477 | * Return: | |
478 | * 0 if successful, -1 otherwise | |
479 | */ | |
480 | ||
984263bc MD |
481 | int |
482 | name_start(void) | |
984263bc MD |
483 | { |
484 | if (ntab != NULL) | |
485 | return(0); | |
6e278935 | 486 | if ((ntab = (NAMT **)calloc(N_TAB_SZ, sizeof(NAMT *))) == NULL) { |
984263bc MD |
487 | paxwarn(1, "Cannot allocate memory for interactive rename table"); |
488 | return(-1); | |
489 | } | |
490 | return(0); | |
491 | } | |
492 | ||
493 | /* | |
494 | * add_name() | |
495 | * add the new name to old name mapping just created by the user. | |
496 | * If an old name mapping is found (there may be duplicate names on an | |
497 | * archive) only the most recent is kept. | |
498 | * Return: | |
499 | * 0 if added, -1 otherwise | |
500 | */ | |
501 | ||
984263bc | 502 | int |
86a586bb | 503 | add_name(char *oname, int onamelen, char *nname) |
984263bc | 504 | { |
86a586bb LF |
505 | NAMT *pt; |
506 | u_int indx; | |
984263bc MD |
507 | |
508 | if (ntab == NULL) { | |
509 | /* | |
510 | * should never happen | |
511 | */ | |
512 | paxwarn(0, "No interactive rename table, links may fail\n"); | |
513 | return(0); | |
514 | } | |
515 | ||
516 | /* | |
517 | * look to see if we have already mapped this file, if so we | |
518 | * will update it | |
519 | */ | |
520 | indx = st_hash(oname, onamelen, N_TAB_SZ); | |
521 | if ((pt = ntab[indx]) != NULL) { | |
522 | /* | |
523 | * look down the has chain for the file | |
524 | */ | |
525 | while ((pt != NULL) && (strcmp(oname, pt->oname) != 0)) | |
526 | pt = pt->fow; | |
527 | ||
528 | if (pt != NULL) { | |
529 | /* | |
530 | * found an old mapping, replace it with the new one | |
531 | * the user just input (if it is different) | |
532 | */ | |
533 | if (strcmp(nname, pt->nname) == 0) | |
534 | return(0); | |
535 | ||
57fed2af | 536 | free((char *)pt->nname); |
984263bc MD |
537 | if ((pt->nname = strdup(nname)) == NULL) { |
538 | paxwarn(1, "Cannot update rename table"); | |
539 | return(-1); | |
540 | } | |
541 | return(0); | |
542 | } | |
543 | } | |
544 | ||
545 | /* | |
546 | * this is a new mapping, add it to the table | |
547 | */ | |
548 | if ((pt = (NAMT *)malloc(sizeof(NAMT))) != NULL) { | |
549 | if ((pt->oname = strdup(oname)) != NULL) { | |
550 | if ((pt->nname = strdup(nname)) != NULL) { | |
551 | pt->fow = ntab[indx]; | |
552 | ntab[indx] = pt; | |
553 | return(0); | |
554 | } | |
57fed2af | 555 | free((char *)pt->oname); |
984263bc | 556 | } |
57fed2af | 557 | free((char *)pt); |
984263bc MD |
558 | } |
559 | paxwarn(1, "Interactive rename table out of memory"); | |
560 | return(-1); | |
561 | } | |
562 | ||
563 | /* | |
564 | * sub_name() | |
565 | * look up a link name to see if it points at a file that has been | |
566 | * remapped by the user. If found, the link is adjusted to contain the | |
567 | * new name (oname is the link to name) | |
568 | */ | |
569 | ||
984263bc | 570 | void |
86a586bb | 571 | sub_name(char *oname, int *onamelen, size_t onamesize) |
984263bc | 572 | { |
86a586bb LF |
573 | NAMT *pt; |
574 | u_int indx; | |
984263bc MD |
575 | |
576 | if (ntab == NULL) | |
577 | return; | |
578 | /* | |
579 | * look the name up in the hash table | |
580 | */ | |
581 | indx = st_hash(oname, *onamelen, N_TAB_SZ); | |
582 | if ((pt = ntab[indx]) == NULL) | |
583 | return; | |
584 | ||
585 | while (pt != NULL) { | |
586 | /* | |
587 | * walk down the hash chain looking for a match | |
588 | */ | |
589 | if (strcmp(oname, pt->oname) == 0) { | |
590 | /* | |
591 | * found it, replace it with the new name | |
592 | * and return (we know that oname has enough space) | |
593 | */ | |
594 | *onamelen = l_strncpy(oname, pt->nname, onamesize - 1); | |
595 | oname[*onamelen] = '\0'; | |
596 | return; | |
597 | } | |
598 | pt = pt->fow; | |
599 | } | |
600 | ||
601 | /* | |
602 | * no match, just return | |
603 | */ | |
604 | return; | |
605 | } | |
606 | ||
607 | /* | |
608 | * device/inode mapping table routines | |
609 | * (used with formats that store device and inodes fields) | |
610 | * | |
611 | * device/inode mapping tables remap the device field in a archive header. The | |
612 | * device/inode fields are used to determine when files are hard links to each | |
613 | * other. However these values have very little meaning outside of that. This | |
614 | * database is used to solve one of two different problems. | |
615 | * | |
616 | * 1) when files are appended to an archive, while the new files may have hard | |
617 | * links to each other, you cannot determine if they have hard links to any | |
618 | * file already stored on the archive from a prior run of pax. We must assume | |
619 | * that these inode/device pairs are unique only within a SINGLE run of pax | |
620 | * (which adds a set of files to an archive). So we have to make sure the | |
621 | * inode/dev pairs we add each time are always unique. We do this by observing | |
622 | * while the inode field is very dense, the use of the dev field is fairly | |
623 | * sparse. Within each run of pax, we remap any device number of a new archive | |
624 | * member that has a device number used in a prior run and already stored in a | |
625 | * file on the archive. During the read phase of the append, we store the | |
626 | * device numbers used and mark them to not be used by any file during the | |
627 | * write phase. If during write we go to use one of those old device numbers, | |
628 | * we remap it to a new value. | |
629 | * | |
630 | * 2) Often the fields in the archive header used to store these values are | |
631 | * too small to store the entire value. The result is an inode or device value | |
632 | * which can be truncated. This really can foul up an archive. With truncation | |
633 | * we end up creating links between files that are really not links (after | |
634 | * truncation the inodes are the same value). We address that by detecting | |
635 | * truncation and forcing a remap of the device field to split truncated | |
636 | * inodes away from each other. Each truncation creates a pattern of bits that | |
637 | * are removed. We use this pattern of truncated bits to partition the inodes | |
638 | * on a single device to many different devices (each one represented by the | |
639 | * truncated bit pattern). All inodes on the same device that have the same | |
640 | * truncation pattern are mapped to the same new device. Two inodes that | |
641 | * truncate to the same value clearly will always have different truncation | |
642 | * bit patterns, so they will be split from away each other. When we spot | |
643 | * device truncation we remap the device number to a non truncated value. | |
644 | * (for more info see table.h for the data structures involved). | |
645 | */ | |
646 | ||
647 | /* | |
648 | * dev_start() | |
649 | * create the device mapping table | |
650 | * Return: | |
651 | * 0 if successful, -1 otherwise | |
652 | */ | |
653 | ||
984263bc MD |
654 | int |
655 | dev_start(void) | |
984263bc MD |
656 | { |
657 | if (dtab != NULL) | |
658 | return(0); | |
6e278935 | 659 | if ((dtab = (DEVT **)calloc(D_TAB_SZ, sizeof(DEVT *))) == NULL) { |
984263bc MD |
660 | paxwarn(1, "Cannot allocate memory for device mapping table"); |
661 | return(-1); | |
662 | } | |
663 | return(0); | |
664 | } | |
665 | ||
666 | /* | |
667 | * add_dev() | |
668 | * add a device number to the table. this will force the device to be | |
669 | * remapped to a new value if it be used during a write phase. This | |
670 | * function is called during the read phase of an append to prohibit the | |
671 | * use of any device number already in the archive. | |
672 | * Return: | |
673 | * 0 if added ok, -1 otherwise | |
674 | */ | |
675 | ||
984263bc | 676 | int |
86a586bb | 677 | add_dev(ARCHD *arcn) |
984263bc MD |
678 | { |
679 | if (chk_dev(arcn->sb.st_dev, 1) == NULL) | |
680 | return(-1); | |
681 | return(0); | |
682 | } | |
683 | ||
684 | /* | |
685 | * chk_dev() | |
686 | * check for a device value in the device table. If not found and the add | |
687 | * flag is set, it is added. This does NOT assign any mapping values, just | |
688 | * adds the device number as one that need to be remapped. If this device | |
689 | * is already mapped, just return with a pointer to that entry. | |
690 | * Return: | |
691 | * pointer to the entry for this device in the device map table. Null | |
692 | * if the add flag is not set and the device is not in the table (it is | |
693 | * not been seen yet). If add is set and the device cannot be added, null | |
694 | * is returned (indicates an error). | |
695 | */ | |
696 | ||
984263bc MD |
697 | static DEVT * |
698 | chk_dev(dev_t dev, int add) | |
984263bc | 699 | { |
86a586bb LF |
700 | DEVT *pt; |
701 | u_int indx; | |
984263bc MD |
702 | |
703 | if (dtab == NULL) | |
704 | return(NULL); | |
705 | /* | |
706 | * look to see if this device is already in the table | |
707 | */ | |
708 | indx = ((unsigned)dev) % D_TAB_SZ; | |
709 | if ((pt = dtab[indx]) != NULL) { | |
710 | while ((pt != NULL) && (pt->dev != dev)) | |
711 | pt = pt->fow; | |
712 | ||
713 | /* | |
714 | * found it, return a pointer to it | |
715 | */ | |
716 | if (pt != NULL) | |
717 | return(pt); | |
718 | } | |
719 | ||
720 | /* | |
721 | * not in table, we add it only if told to as this may just be a check | |
722 | * to see if a device number is being used. | |
723 | */ | |
724 | if (add == 0) | |
725 | return(NULL); | |
726 | ||
727 | /* | |
728 | * allocate a node for this device and add it to the front of the hash | |
729 | * chain. Note we do not assign remaps values here, so the pt->list | |
730 | * list must be NULL. | |
731 | */ | |
732 | if ((pt = (DEVT *)malloc(sizeof(DEVT))) == NULL) { | |
733 | paxwarn(1, "Device map table out of memory"); | |
734 | return(NULL); | |
735 | } | |
736 | pt->dev = dev; | |
737 | pt->list = NULL; | |
738 | pt->fow = dtab[indx]; | |
739 | dtab[indx] = pt; | |
740 | return(pt); | |
741 | } | |
742 | /* | |
743 | * map_dev() | |
744 | * given an inode and device storage mask (the mask has a 1 for each bit | |
745 | * the archive format is able to store in a header), we check for inode | |
746 | * and device truncation and remap the device as required. Device mapping | |
747 | * can also occur when during the read phase of append a device number was | |
748 | * seen (and was marked as do not use during the write phase). WE ASSUME | |
749 | * that unsigned longs are the same size or bigger than the fields used | |
750 | * for ino_t and dev_t. If not the types will have to be changed. | |
751 | * Return: | |
752 | * 0 if all ok, -1 otherwise. | |
753 | */ | |
754 | ||
984263bc | 755 | int |
86a586bb | 756 | map_dev(ARCHD *arcn, u_long dev_mask, u_long ino_mask) |
984263bc | 757 | { |
86a586bb LF |
758 | DEVT *pt; |
759 | DLIST *dpt; | |
984263bc MD |
760 | static dev_t lastdev = 0; /* next device number to try */ |
761 | int trc_ino = 0; | |
762 | int trc_dev = 0; | |
763 | ino_t trunc_bits = 0; | |
764 | ino_t nino; | |
765 | ||
766 | if (dtab == NULL) | |
767 | return(0); | |
768 | /* | |
769 | * check for device and inode truncation, and extract the truncated | |
770 | * bit pattern. | |
771 | */ | |
772 | if ((arcn->sb.st_dev & (dev_t)dev_mask) != arcn->sb.st_dev) | |
773 | ++trc_dev; | |
774 | if ((nino = arcn->sb.st_ino & (ino_t)ino_mask) != arcn->sb.st_ino) { | |
775 | ++trc_ino; | |
776 | trunc_bits = arcn->sb.st_ino & (ino_t)(~ino_mask); | |
777 | } | |
778 | ||
779 | /* | |
780 | * see if this device is already being mapped, look up the device | |
781 | * then find the truncation bit pattern which applies | |
782 | */ | |
783 | if ((pt = chk_dev(arcn->sb.st_dev, 0)) != NULL) { | |
784 | /* | |
785 | * this device is already marked to be remapped | |
786 | */ | |
787 | for (dpt = pt->list; dpt != NULL; dpt = dpt->fow) | |
788 | if (dpt->trunc_bits == trunc_bits) | |
789 | break; | |
790 | ||
791 | if (dpt != NULL) { | |
792 | /* | |
793 | * we are being remapped for this device and pattern | |
794 | * change the device number to be stored and return | |
795 | */ | |
796 | arcn->sb.st_dev = dpt->dev; | |
797 | arcn->sb.st_ino = nino; | |
798 | return(0); | |
799 | } | |
800 | } else { | |
801 | /* | |
802 | * this device is not being remapped YET. if we do not have any | |
803 | * form of truncation, we do not need a remap | |
804 | */ | |
805 | if (!trc_ino && !trc_dev) | |
806 | return(0); | |
807 | ||
808 | /* | |
809 | * we have truncation, have to add this as a device to remap | |
810 | */ | |
811 | if ((pt = chk_dev(arcn->sb.st_dev, 1)) == NULL) | |
812 | goto bad; | |
813 | ||
814 | /* | |
815 | * if we just have a truncated inode, we have to make sure that | |
816 | * all future inodes that do not truncate (they have the | |
817 | * truncation pattern of all 0's) continue to map to the same | |
818 | * device number. We probably have already written inodes with | |
819 | * this device number to the archive with the truncation | |
820 | * pattern of all 0's. So we add the mapping for all 0's to the | |
821 | * same device number. | |
822 | */ | |
823 | if (!trc_dev && (trunc_bits != 0)) { | |
824 | if ((dpt = (DLIST *)malloc(sizeof(DLIST))) == NULL) | |
825 | goto bad; | |
826 | dpt->trunc_bits = 0; | |
827 | dpt->dev = arcn->sb.st_dev; | |
828 | dpt->fow = pt->list; | |
829 | pt->list = dpt; | |
830 | } | |
831 | } | |
832 | ||
833 | /* | |
834 | * look for a device number not being used. We must watch for wrap | |
835 | * around on lastdev (so we do not get stuck looking forever!) | |
836 | */ | |
837 | while (++lastdev > 0) { | |
838 | if (chk_dev(lastdev, 0) != NULL) | |
839 | continue; | |
840 | /* | |
841 | * found an unused value. If we have reached truncation point | |
842 | * for this format we are hosed, so we give up. Otherwise we | |
843 | * mark it as being used. | |
844 | */ | |
845 | if (((lastdev & ((dev_t)dev_mask)) != lastdev) || | |
846 | (chk_dev(lastdev, 1) == NULL)) | |
847 | goto bad; | |
848 | break; | |
849 | } | |
850 | ||
851 | if ((lastdev <= 0) || ((dpt = (DLIST *)malloc(sizeof(DLIST))) == NULL)) | |
852 | goto bad; | |
853 | ||
854 | /* | |
855 | * got a new device number, store it under this truncation pattern. | |
856 | * change the device number this file is being stored with. | |
857 | */ | |
858 | dpt->trunc_bits = trunc_bits; | |
859 | dpt->dev = lastdev; | |
860 | dpt->fow = pt->list; | |
861 | pt->list = dpt; | |
862 | arcn->sb.st_dev = lastdev; | |
863 | arcn->sb.st_ino = nino; | |
864 | return(0); | |
865 | ||
866 | bad: | |
867 | paxwarn(1, "Unable to fix truncated inode/device field when storing %s", | |
868 | arcn->name); | |
869 | paxwarn(0, "Archive may create improper hard links when extracted"); | |
870 | return(0); | |
871 | } | |
872 | ||
873 | /* | |
874 | * directory access/mod time reset table routines (for directories READ by pax) | |
875 | * | |
418e7697 | 876 | * The pax -t flag requires that access times of archive files be the same |
984263bc MD |
877 | * before being read by pax. For regular files, access time is restored after |
878 | * the file has been copied. This database provides the same functionality for | |
879 | * directories read during file tree traversal. Restoring directory access time | |
880 | * is more complex than files since directories may be read several times until | |
881 | * all the descendants in their subtree are visited by fts. Directory access | |
882 | * and modification times are stored during the fts pre-order visit (done | |
418e7697 | 883 | * before any descendants in the subtree are visited) and restored after the |
984263bc MD |
884 | * fts post-order visit (after all the descendants have been visited). In the |
885 | * case of premature exit from a subtree (like from the effects of -n), any | |
886 | * directory entries left in this database are reset during final cleanup | |
887 | * operations of pax. Entries are hashed by inode number for fast lookup. | |
888 | */ | |
889 | ||
890 | /* | |
891 | * atdir_start() | |
892 | * create the directory access time database for directories READ by pax. | |
893 | * Return: | |
894 | * 0 is created ok, -1 otherwise. | |
895 | */ | |
896 | ||
984263bc MD |
897 | int |
898 | atdir_start(void) | |
984263bc MD |
899 | { |
900 | if (atab != NULL) | |
901 | return(0); | |
6e278935 | 902 | if ((atab = (ATDIR **)calloc(A_TAB_SZ, sizeof(ATDIR *))) == NULL) { |
984263bc MD |
903 | paxwarn(1,"Cannot allocate space for directory access time table"); |
904 | return(-1); | |
905 | } | |
906 | return(0); | |
907 | } | |
908 | ||
909 | ||
910 | /* | |
911 | * atdir_end() | |
912 | * walk through the directory access time table and reset the access time | |
913 | * of any directory who still has an entry left in the database. These | |
914 | * entries are for directories READ by pax | |
915 | */ | |
916 | ||
984263bc MD |
917 | void |
918 | atdir_end(void) | |
984263bc | 919 | { |
86a586bb LF |
920 | ATDIR *pt; |
921 | int i; | |
984263bc MD |
922 | |
923 | if (atab == NULL) | |
924 | return; | |
925 | /* | |
926 | * for each non-empty hash table entry reset all the directories | |
927 | * chained there. | |
928 | */ | |
929 | for (i = 0; i < A_TAB_SZ; ++i) { | |
930 | if ((pt = atab[i]) == NULL) | |
931 | continue; | |
932 | /* | |
933 | * remember to force the times, set_ftime() looks at pmtime | |
934 | * and patime, which only applies to things CREATED by pax, | |
935 | * not read by pax. Read time reset is controlled by -t. | |
936 | */ | |
937 | for (; pt != NULL; pt = pt->fow) | |
938 | set_ftime(pt->name, pt->mtime, pt->atime, 1); | |
939 | } | |
940 | } | |
941 | ||
942 | /* | |
943 | * add_atdir() | |
944 | * add a directory to the directory access time table. Table is hashed | |
945 | * and chained by inode number. This is for directories READ by pax | |
946 | */ | |
947 | ||
984263bc MD |
948 | void |
949 | add_atdir(char *fname, dev_t dev, ino_t ino, time_t mtime, time_t atime) | |
984263bc | 950 | { |
86a586bb LF |
951 | ATDIR *pt; |
952 | u_int indx; | |
984263bc MD |
953 | |
954 | if (atab == NULL) | |
955 | return; | |
956 | ||
957 | /* | |
958 | * make sure this directory is not already in the table, if so just | |
959 | * return (the older entry always has the correct time). The only | |
960 | * way this will happen is when the same subtree can be traversed by | |
961 | * different args to pax and the -n option is aborting fts out of a | |
418e7697 | 962 | * subtree before all the post-order visits have been made. |
984263bc MD |
963 | */ |
964 | indx = ((unsigned)ino) % A_TAB_SZ; | |
965 | if ((pt = atab[indx]) != NULL) { | |
966 | while (pt != NULL) { | |
967 | if ((pt->ino == ino) && (pt->dev == dev)) | |
968 | break; | |
969 | pt = pt->fow; | |
970 | } | |
971 | ||
972 | /* | |
973 | * oops, already there. Leave it alone. | |
974 | */ | |
975 | if (pt != NULL) | |
976 | return; | |
977 | } | |
978 | ||
979 | /* | |
980 | * add it to the front of the hash chain | |
981 | */ | |
982 | if ((pt = (ATDIR *)malloc(sizeof(ATDIR))) != NULL) { | |
983 | if ((pt->name = strdup(fname)) != NULL) { | |
984 | pt->dev = dev; | |
985 | pt->ino = ino; | |
986 | pt->mtime = mtime; | |
987 | pt->atime = atime; | |
988 | pt->fow = atab[indx]; | |
989 | atab[indx] = pt; | |
990 | return; | |
991 | } | |
57fed2af | 992 | free((char *)pt); |
984263bc MD |
993 | } |
994 | ||
995 | paxwarn(1, "Directory access time reset table ran out of memory"); | |
996 | return; | |
997 | } | |
998 | ||
999 | /* | |
1000 | * get_atdir() | |
1001 | * look up a directory by inode and device number to obtain the access | |
1002 | * and modification time you want to set to. If found, the modification | |
1003 | * and access time parameters are set and the entry is removed from the | |
1004 | * table (as it is no longer needed). These are for directories READ by | |
1005 | * pax | |
1006 | * Return: | |
1007 | * 0 if found, -1 if not found. | |
1008 | */ | |
1009 | ||
984263bc MD |
1010 | int |
1011 | get_atdir(dev_t dev, ino_t ino, time_t *mtime, time_t *atime) | |
984263bc | 1012 | { |
86a586bb LF |
1013 | ATDIR *pt; |
1014 | ATDIR **ppt; | |
1015 | u_int indx; | |
984263bc MD |
1016 | |
1017 | if (atab == NULL) | |
1018 | return(-1); | |
1019 | /* | |
1020 | * hash by inode and search the chain for an inode and device match | |
1021 | */ | |
1022 | indx = ((unsigned)ino) % A_TAB_SZ; | |
1023 | if ((pt = atab[indx]) == NULL) | |
1024 | return(-1); | |
1025 | ||
1026 | ppt = &(atab[indx]); | |
1027 | while (pt != NULL) { | |
1028 | if ((pt->ino == ino) && (pt->dev == dev)) | |
1029 | break; | |
1030 | /* | |
1031 | * no match, go to next one | |
1032 | */ | |
1033 | ppt = &(pt->fow); | |
1034 | pt = pt->fow; | |
1035 | } | |
1036 | ||
1037 | /* | |
1038 | * return if we did not find it. | |
1039 | */ | |
1040 | if (pt == NULL) | |
1041 | return(-1); | |
1042 | ||
1043 | /* | |
1044 | * found it. return the times and remove the entry from the table. | |
1045 | */ | |
1046 | *ppt = pt->fow; | |
1047 | *mtime = pt->mtime; | |
1048 | *atime = pt->atime; | |
57fed2af EN |
1049 | free((char *)pt->name); |
1050 | free((char *)pt); | |
984263bc MD |
1051 | return(0); |
1052 | } | |
1053 | ||
1054 | /* | |
1055 | * directory access mode and time storage routines (for directories CREATED | |
1056 | * by pax). | |
1057 | * | |
1058 | * Pax requires that extracted directories, by default, have their access/mod | |
1059 | * times and permissions set to the values specified in the archive. During the | |
1060 | * actions of extracting (and creating the destination subtree during -rw copy) | |
1061 | * directories extracted may be modified after being created. Even worse is | |
1062 | * that these directories may have been created with file permissions which | |
1063 | * prohibits any descendants of these directories from being extracted. When | |
1064 | * directories are created by pax, access rights may be added to permit the | |
1065 | * creation of files in their subtree. Every time pax creates a directory, the | |
1066 | * times and file permissions specified by the archive are stored. After all | |
1067 | * files have been extracted (or copied), these directories have their times | |
1068 | * and file modes reset to the stored values. The directory info is restored in | |
1069 | * reverse order as entries were added to the data file from root to leaf. To | |
1070 | * restore atime properly, we must go backwards. The data file consists of | |
1071 | * records with two parts, the file name followed by a DIRDATA trailer. The | |
1072 | * fixed sized trailer contains the size of the name plus the off_t location in | |
1073 | * the file. To restore we work backwards through the file reading the trailer | |
1074 | * then the file name. | |
1075 | */ | |
1076 | ||
1077 | /* | |
1078 | * dir_start() | |
1079 | * set up the directory time and file mode storage for directories CREATED | |
1080 | * by pax. | |
1081 | * Return: | |
1082 | * 0 if ok, -1 otherwise | |
1083 | */ | |
1084 | ||
984263bc MD |
1085 | int |
1086 | dir_start(void) | |
984263bc MD |
1087 | { |
1088 | ||
1089 | if (dirfd != -1) | |
1090 | return(0); | |
1091 | ||
1092 | /* | |
1093 | * unlink the file so it goes away at termination by itself | |
1094 | */ | |
1095 | memcpy(tempbase, _TFILE_BASE, sizeof(_TFILE_BASE)); | |
1096 | if ((dirfd = mkstemp(tempfile)) >= 0) { | |
57fed2af | 1097 | unlink(tempfile); |
984263bc MD |
1098 | return(0); |
1099 | } | |
1100 | paxwarn(1, "Unable to create temporary file for directory times: %s", | |
1101 | tempfile); | |
1102 | return(-1); | |
1103 | } | |
1104 | ||
1105 | /* | |
1106 | * add_dir() | |
1107 | * add the mode and times for a newly CREATED directory | |
1108 | * name is name of the directory, psb the stat buffer with the data in it, | |
1109 | * frc_mode is a flag that says whether to force the setting of the mode | |
1110 | * (ignoring the user set values for preserving file mode). Frc_mode is | |
1111 | * for the case where we created a file and found that the resulting | |
1112 | * directory was not writeable and the user asked for file modes to NOT | |
1113 | * be preserved. (we have to preserve what was created by default, so we | |
1114 | * have to force the setting at the end. this is stated explicitly in the | |
1115 | * pax spec) | |
1116 | */ | |
1117 | ||
984263bc MD |
1118 | void |
1119 | add_dir(char *name, int nlen, struct stat *psb, int frc_mode) | |
984263bc MD |
1120 | { |
1121 | DIRDATA dblk; | |
1122 | ||
1123 | if (dirfd < 0) | |
1124 | return; | |
1125 | ||
1126 | /* | |
1127 | * get current position (where file name will start) so we can store it | |
1128 | * in the trailer | |
1129 | */ | |
1130 | if ((dblk.npos = lseek(dirfd, 0L, SEEK_CUR)) < 0) { | |
1131 | paxwarn(1,"Unable to store mode and times for directory: %s",name); | |
1132 | return; | |
1133 | } | |
1134 | ||
1135 | /* | |
1136 | * write the file name followed by the trailer | |
1137 | */ | |
1138 | dblk.nlen = nlen + 1; | |
1139 | dblk.mode = psb->st_mode & 0xffff; | |
1140 | dblk.mtime = psb->st_mtime; | |
1141 | dblk.atime = psb->st_atime; | |
1142 | dblk.frc_mode = frc_mode; | |
1143 | if ((write(dirfd, name, dblk.nlen) == dblk.nlen) && | |
1144 | (write(dirfd, (char *)&dblk, sizeof(dblk)) == sizeof(dblk))) { | |
1145 | ++dircnt; | |
1146 | return; | |
1147 | } | |
1148 | ||
1149 | paxwarn(1,"Unable to store mode and times for created directory: %s",name); | |
1150 | return; | |
1151 | } | |
1152 | ||
1153 | /* | |
1154 | * proc_dir() | |
1155 | * process all file modes and times stored for directories CREATED | |
1156 | * by pax | |
1157 | */ | |
1158 | ||
984263bc MD |
1159 | void |
1160 | proc_dir(void) | |
984263bc MD |
1161 | { |
1162 | char name[PAXPATHLEN+1]; | |
1163 | DIRDATA dblk; | |
1164 | u_long cnt; | |
1165 | ||
1166 | if (dirfd < 0) | |
1167 | return; | |
1168 | /* | |
1169 | * read backwards through the file and process each directory | |
1170 | */ | |
1171 | for (cnt = 0; cnt < dircnt; ++cnt) { | |
1172 | /* | |
1173 | * read the trailer, then the file name, if this fails | |
1174 | * just give up. | |
1175 | */ | |
1176 | if (lseek(dirfd, -((off_t)sizeof(dblk)), SEEK_CUR) < 0) | |
1177 | break; | |
1178 | if (read(dirfd,(char *)&dblk, sizeof(dblk)) != sizeof(dblk)) | |
1179 | break; | |
1180 | if (lseek(dirfd, dblk.npos, SEEK_SET) < 0) | |
1181 | break; | |
1182 | if (read(dirfd, name, dblk.nlen) != dblk.nlen) | |
1183 | break; | |
1184 | if (lseek(dirfd, dblk.npos, SEEK_SET) < 0) | |
1185 | break; | |
1186 | ||
1187 | /* | |
1188 | * frc_mode set, make sure we set the file modes even if | |
1189 | * the user didn't ask for it (see file_subs.c for more info) | |
1190 | */ | |
1191 | if (pmode || dblk.frc_mode) | |
1192 | set_pmode(name, dblk.mode); | |
1193 | if (patime || pmtime) | |
1194 | set_ftime(name, dblk.mtime, dblk.atime, 0); | |
1195 | } | |
1196 | ||
57fed2af | 1197 | close(dirfd); |
984263bc MD |
1198 | dirfd = -1; |
1199 | if (cnt != dircnt) | |
1200 | paxwarn(1,"Unable to set mode and times for created directories"); | |
1201 | return; | |
1202 | } | |
1203 | ||
1204 | /* | |
1205 | * database independent routines | |
1206 | */ | |
1207 | ||
1208 | /* | |
1209 | * st_hash() | |
1210 | * hashes filenames to a u_int for hashing into a table. Looks at the tail | |
1211 | * end of file, as this provides far better distribution than any other | |
1212 | * part of the name. For performance reasons we only care about the last | |
1213 | * MAXKEYLEN chars (should be at LEAST large enough to pick off the file | |
1214 | * name). Was tested on 500,000 name file tree traversal from the root | |
1215 | * and gave almost a perfectly uniform distribution of keys when used with | |
1216 | * prime sized tables (MAXKEYLEN was 128 in test). Hashes (sizeof int) | |
1217 | * chars at a time and pads with 0 for last addition. | |
1218 | * Return: | |
1219 | * the hash value of the string MOD (%) the table size. | |
1220 | */ | |
1221 | ||
984263bc MD |
1222 | u_int |
1223 | st_hash(char *name, int len, int tabsz) | |
984263bc | 1224 | { |
86a586bb LF |
1225 | char *pt; |
1226 | char *dest; | |
1227 | char *end; | |
1228 | int i; | |
1229 | u_int key = 0; | |
1230 | int steps; | |
1231 | int res; | |
984263bc MD |
1232 | u_int val; |
1233 | ||
1234 | /* | |
1235 | * only look at the tail up to MAXKEYLEN, we do not need to waste | |
1236 | * time here (remember these are pathnames, the tail is what will | |
1237 | * spread out the keys) | |
1238 | */ | |
1239 | if (len > MAXKEYLEN) { | |
1240 | pt = &(name[len - MAXKEYLEN]); | |
1241 | len = MAXKEYLEN; | |
1242 | } else | |
1243 | pt = name; | |
1244 | ||
1245 | /* | |
1246 | * calculate the number of u_int size steps in the string and if | |
1247 | * there is a runt to deal with | |
1248 | */ | |
1249 | steps = len/sizeof(u_int); | |
1250 | res = len % sizeof(u_int); | |
1251 | ||
1252 | /* | |
1253 | * add up the value of the string in unsigned integer sized pieces | |
1254 | * too bad we cannot have unsigned int aligned strings, then we | |
1255 | * could avoid the expensive copy. | |
1256 | */ | |
1257 | for (i = 0; i < steps; ++i) { | |
1258 | end = pt + sizeof(u_int); | |
1259 | dest = (char *)&val; | |
1260 | while (pt < end) | |
1261 | *dest++ = *pt++; | |
1262 | key += val; | |
1263 | } | |
1264 | ||
1265 | /* | |
1266 | * add in the runt padded with zero to the right | |
1267 | */ | |
1268 | if (res) { | |
1269 | val = 0; | |
1270 | end = pt + res; | |
1271 | dest = (char *)&val; | |
1272 | while (pt < end) | |
1273 | *dest++ = *pt++; | |
1274 | key += val; | |
1275 | } | |
1276 | ||
1277 | /* | |
1278 | * return the result mod the table size | |
1279 | */ | |
1280 | return(key % tabsz); | |
1281 | } |